## LIBRARIES

In [None]:
!pip install --upgrade scikit-learn



In [None]:
!pip install --upgrade joblib



In [None]:
!pip install --upgrade imbalanced-learn
!pip install --upgrade scikit-learn
!pip install --upgrade joblib



In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, classification_report, precision_recall_curve
import matplotlib.pyplot as plt

#Step 1: Upload Kaggle API Key

In [None]:
from google.colab import files
uploaded = files.upload()

Saving kaggle (6).json to kaggle (6).json


# Step 2: Install Kaggle Library and Set API Key

In [None]:
pip install --upgrade kaggle

In [None]:
!pip install kaggle
!mkdir -p ~/.kaggle
!mv "kaggle (6).json" ~/.kaggle/kaggle.json
!chmod 600 ~/.kaggle/kaggle.json

# Step 3: Download Dataset

In [None]:
!kaggle datasets download -d mlg-ulb/creditcardfraud

creditcardfraud.zip: Skipping, found more recently modified local copy (use --force to force download)


# Step 4: Unzip the Dataset

In [None]:
!unzip creditcardfraud.zip

Archive:  creditcardfraud.zip
replace creditcard.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: creditcard.csv          


In [None]:
!ls

creditcard.csv	     random_forest_model.joblib  voting_classifier_model.joblib
creditcardfraud.zip  sample_data		 xgboost_model.joblib


# Importing Dataset

In [None]:
dataset = pd.read_csv("/content/creditcard.csv")
df =dataset.copy()

In [None]:
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [None]:
df.to_csv('creditcard.csv', index=False)

# Data Preprocessing

# 1. Basic Information:

In [None]:
X = df.drop('Class', axis=1)
y = df['Class']

# 2. Splitting the dataset:

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
X_test.to_csv('creditcardX_test.csv', index=False)

# 3. Normalization:

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
X_train.to_csv('creditcard_X_train.csv', index=False)

# 4. Handling Class Imbalance with SMOTE:

In [None]:
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

# Model Building

# 1. XGBoost Model

In [None]:
import xgboost as xgb
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

In [None]:
# Assuming X_resampled and y_resampled are already prepared from SMOTE
xgb_model = xgb.XGBClassifier(random_state=42)
xgb_model.fit(X_resampled, y_resampled)
xgb_predictions = xgb_model.predict(X_test)

# 2. Evaluation Metrics for XGBoost Model

In [None]:
# Evaluate the model
print("XGBoost Metrics:")
print("Accuracy:", accuracy_score(y_test, xgb_predictions))
print("Precision:", precision_score(y_test, xgb_predictions))
print("Recall:", recall_score(y_test, xgb_predictions))
print("F1-Score:", f1_score(y_test, xgb_predictions))
print("Confusion Matrix:\n", confusion_matrix(y_test, xgb_predictions))
print("Classification Report:\n", classification_report(y_test, xgb_predictions))


XGBoost Metrics:
Accuracy: 0.9993679997191109
Precision: 0.7980769230769231
Recall: 0.8469387755102041
F1-Score: 0.821782178217822
Confusion Matrix:
 [[56843    21]
 [   15    83]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.80      0.85      0.82        98

    accuracy                           1.00     56962
   macro avg       0.90      0.92      0.91     56962
weighted avg       1.00      1.00      1.00     56962



# 3.  Random Forest

In [None]:
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_resampled, y_resampled)
rf_predictions = rf_model.predict(X_test)

# 4. Evaluation Metrics for Random Forest

In [None]:
print("\nRandom Forest Metrics:")
print("Precision:", precision_score(y_test, rf_predictions))
print("Recall:", recall_score(y_test, rf_predictions))
print("F1-Score:", f1_score(y_test, rf_predictions))
print("Confusion Matrix:\n", confusion_matrix(y_test, rf_predictions))
print("Classification Report:\n", classification_report(y_test, rf_predictions))


Random Forest Metrics:
Precision: 0.9120879120879121
Recall: 0.8469387755102041
F1-Score: 0.8783068783068784
Confusion Matrix:
 [[56856     8]
 [   15    83]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.91      0.85      0.88        98

    accuracy                           1.00     56962
   macro avg       0.96      0.92      0.94     56962
weighted avg       1.00      1.00      1.00     56962



# 5. Ensemble Method - Voting Classifier

In [None]:
ensemble_model = VotingClassifier(estimators=[('xgb', xgb_model), ('rf', rf_model)], voting='soft')
ensemble_model.fit(X_resampled, y_resampled)
ensemble_predictions = ensemble_model.predict(X_test)

# 6. Evaluation Metrics for Ensemble Model

In [None]:
print("Ensemble Model Metrics:")
print("Precision:", precision_score(y_test, ensemble_predictions))
print("Recall:", recall_score(y_test, ensemble_predictions))
print("F1-Score:", f1_score(y_test, ensemble_predictions))
print("Confusion Matrix:\n", confusion_matrix(y_test, ensemble_predictions))
print("Classification Report:\n", classification_report(y_test, ensemble_predictions))

Ensemble Model Metrics:
Precision: 0.865979381443299
Recall: 0.8571428571428571
F1-Score: 0.8615384615384615
Confusion Matrix:
 [[56851    13]
 [   14    84]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.87      0.86      0.86        98

    accuracy                           1.00     56962
   macro avg       0.93      0.93      0.93     56962
weighted avg       1.00      1.00      1.00     56962



In [None]:


# Save the models
joblib.dump(xgb_model, "xgboost_model.joblib")
joblib.dump(rf_model, "random_forest_model.joblib")
joblib.dump(ensemble_model, "voting_classifier_model.joblib")

['voting_classifier_model.joblib']

In [None]:
# Assuming the models are saved in the current working directory

from google.colab import files

files.download("xgboost_model.joblib")
files.download("random_forest_model.joblib")
files.download("voting_classifier_model.joblib")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>