 ## 1. Import the libraries

In [None]:
import pandas as pd 
import numpy as np 
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix,classification_report,roc_curve, auc 
import seaborn as sns 
import matplotlib.pyplot as plt 


## 2. Load and Explore the data

In [None]:
df = pd.read_csv('creditcard.csv')
df.info()
df.head()

## 3. Prepare Features and Labels

In [None]:
x = df.drop(['Time','Amount','Class','id'], axis =1, errors='ignore')
y = df['Class']


## 4. Split the dataset 

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2,random_state=42)

## 5. Scale the Features

In [None]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

## 6. Check the class distribution

In [None]:
print(pd.Series(y_train).value_counts(normalize=True))


## 7. Build and Train the model using the Random Forest Model

In [None]:
rf_model = RandomForestClassifier(
    n_estimators=10, # number of trees in our forrest
    max_depth=10, # number of depth of each tree
    min_samples_split=2, # number of minimum samples required to split
    random_state=42 # to reproduce the same 'randomness'
)

cv_scores = cross_val_score(rf_model,x_train_scaled,y_train,cv=5,scoring='f1')
rf_model.fit(x_train_scaled, y_train)

## 8. Make Predictions and evaluate 

In [None]:
y_pred = rf_model.predict(x_test_scaled)
print(classification_report(y_test, y_pred))

## 9. Plot the confusion matrix

In [None]:
plt.figure(figsize=(8,6))
cm = confusion_matrix(y_test,y_pred)
sns.heatmap(cm,annot=True,fmt='d',cmap='Blues')
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

## 10. Plot the Feature Importance 

In [None]:
importance = rf_model.feature_importances_
feature_imp = pd.DataFrame({
    'Feature':x.columns,
    'Importance': importance
}).sort_values('Importance',ascending=False)

plt.figure(figsize=(10,6))
sns.barplot(data = feature_imp,x='Importance', y = 'Feature')
plt.xlabel('Importance Score')
plt.tight_layout()
plt.show()

## 11. Plot the Correlation Matrix 

In [None]:
plt.figure(figsize=(12,8))
correlation_matrix=x.corr()
sns.heatmap(correlation_matrix,cmap = 'coolwarm',center=0,annot=True,fmt='.2f') #center is 
plt.title('Feature Correlation Matric')
plt.tight_layout()
plt.show()

## 12. Plot the Reciever Operations Characteristics Curve

In [None]:
y_pred_proba = rf_model.predict_proba(x_test_scaled)[:,1]
fpr,tpr, _ = roc_curve(y_test, y_pred_proba)
roc_auc = auc(fpr,tpr)

#Viuslising the roc curve 

plt.figure(figsize=(8,6))
plt.plot(fpr,tpr,color='darkorange',lw=2, label = f'ROC curve  (AUC = {roc_auc:.2f})')
plt.plot([0,1],[0,1],color = 'navy', lw=2, linestyle='--')

plt.xlim([0.0,1.0])
plt.ylim([0.0,1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')

plt.title('Reciever Operating Characteristics (ROC) Curve')
plt.legend(loc='lower right')
plt.show()