# Import Libraries

In [46]:
import pandas as pd
import numpy as np

# Suppressing Warnings

In [47]:
import warnings

warnings.filterwarnings("ignore")

## Importing Models

In [48]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load Dataset

In [49]:
df=pd.read_csv(r"D:\ML\DataSets\creditcard.csv")
df.head

<bound method NDFrame.head of             Time         V1         V2        V3        V4        V5  \
0            0.0  -1.359807  -0.072781  2.536347  1.378155 -0.338321   
1            0.0   1.191857   0.266151  0.166480  0.448154  0.060018   
2            1.0  -1.358354  -1.340163  1.773209  0.379780 -0.503198   
3            1.0  -0.966272  -0.185226  1.792993 -0.863291 -0.010309   
4            2.0  -1.158233   0.877737  1.548718  0.403034 -0.407193   
...          ...        ...        ...       ...       ...       ...   
284802  172786.0 -11.881118  10.071785 -9.834783 -2.066656 -5.364473   
284803  172787.0  -0.732789  -0.055080  2.035030 -0.738589  0.868229   
284804  172788.0   1.919565  -0.301254 -3.249640 -0.557828  2.630515   
284805  172788.0  -0.240440   0.530483  0.702510  0.689799 -0.377961   
284806  172792.0  -0.533413  -0.189733  0.703337 -0.506271 -0.012546   

              V6        V7        V8        V9  ...       V21       V22  \
0       0.462388  0.239599  0.

# X & Y

In [50]:
x=df.drop(['Class'],axis=1)
y=df['Class']

# Train Test Split

In [51]:
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.25,random_state=42)

# Model

In [52]:
models = {
    'LogisticRegression': LogisticRegression(),
    'DecisionTreeClassifier':DecisionTreeClassifier(),
    'RandomForestClassifier':RandomForestClassifier(),
    'SVC':SVC(),
    'GaussianNB': GaussianNB(),
    'KNN':KNeighborsClassifier(n_neighbors=10)
    
}


for name, model in models.items():
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')  # Weighted for multiclass
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    print(f"Model: {name}: Accuracy: {accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}, F1: {f1:.2f}")

Model: LogisticRegression: Accuracy: 1.00, Precision: 1.00, Recall: 1.00, F1: 1.00
Model: DecisionTreeClassifier: Accuracy: 1.00, Precision: 1.00, Recall: 1.00, F1: 1.00
Model: RandomForestClassifier: Accuracy: 1.00, Precision: 1.00, Recall: 1.00, F1: 1.00
Model: SVC: Accuracy: 1.00, Precision: 1.00, Recall: 1.00, F1: 1.00
Model: GaussianNB: Accuracy: 0.99, Precision: 1.00, Recall: 0.99, F1: 1.00
Model: KNN: Accuracy: 1.00, Precision: 1.00, Recall: 1.00, F1: 1.00


In [53]:
from sklearn.model_selection import cross_val_score

# Perform k-fold cross-validation to validate model performance
for name, model in models.items():
    scores = cross_val_score(model, x, y, cv=2)
    print("Cross-validation score of {}:".format(name), scores) 

Cross-validation score of LogisticRegression: [0.98693857 0.99867278]
Cross-validation score of DecisionTreeClassifier: [0.03720401 0.99733152]
Cross-validation score of RandomForestClassifier: [0.05060251 0.9990871 ]
Cross-validation score of SVC: [0.99827252 0.99827251]
Cross-validation score of GaussianNB: [0.98297098 0.986475  ]
Cross-validation score of KNN: [0.01014719 0.99827251]
