In [1]:
#Import All Libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

#Import the Dataset
df=pd.read_csv("creditcard.csv")

#Data in Underfitted. So,Change its shape
legit=df[df.Class==0]
fraud=df[df.Class==1]
legit_sample=legit.sample(n=492)
new_df=pd.concat([legit_sample,fraud],axis=0)

#Data-Preprocessing
scalar=StandardScaler()
X=new_df.drop('Class',axis=1)
y=new_df['Class']
x=pd.DataFrame(scalar.fit_transform(X),columns=X.columns)

#Train-Test Split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,stratify=y,random_state=42)

#Models Training and Accuracy
models=[]
models.append(('Logistic Regression',LogisticRegression(max_iter=1000)))
models.append(('KNN',KNeighborsClassifier()))
models.append(('RandomForestClassifier',RandomForestClassifier()))
models.append(('SVM',SVC(kernel='linear')))
for name,model in models:
    print()
    print("******",name,"*****")
    print()
    model.fit(x_train,y_train)
    y_pred=model.predict(x_test)
    print("Accuracy Score:",accuracy_score(y_test,y_pred))


****** Logistic Regression *****

Accuracy Score: 0.9644670050761421

****** KNN *****

Accuracy Score: 0.949238578680203

****** RandomForestClassifier *****

Accuracy Score: 0.9695431472081218

****** SVM *****

Accuracy Score: 0.9543147208121827


In [19]:
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [21]:
df.tail()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
284802,172786.0,-11.881118,10.071785,-9.834783,-2.066656,-5.364473,-2.606837,-4.918215,7.305334,1.914428,...,0.213454,0.111864,1.01448,-0.509348,1.436807,0.250034,0.943651,0.823731,0.77,0
284803,172787.0,-0.732789,-0.05508,2.03503,-0.738589,0.868229,1.058415,0.02433,0.294869,0.5848,...,0.214205,0.924384,0.012463,-1.016226,-0.606624,-0.395255,0.068472,-0.053527,24.79,0
284804,172788.0,1.919565,-0.301254,-3.24964,-0.557828,2.630515,3.03126,-0.296827,0.708417,0.432454,...,0.232045,0.578229,-0.037501,0.640134,0.265745,-0.087371,0.004455,-0.026561,67.88,0
284805,172788.0,-0.24044,0.530483,0.70251,0.689799,-0.377961,0.623708,-0.68618,0.679145,0.392087,...,0.265245,0.800049,-0.163298,0.123205,-0.569159,0.546668,0.108821,0.104533,10.0,0
284806,172792.0,-0.533413,-0.189733,0.703337,-0.506271,-0.012546,-0.649617,1.577006,-0.41465,0.48618,...,0.261057,0.643078,0.376777,0.008797,-0.473649,-0.818267,-0.002415,0.013649,217.0,0


In [45]:
#Data Underfits
df['Class'].value_counts()

Class
0    284315
1       492
Name: count, dtype: int64

In [35]:
new_df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
28772,35152.0,1.209563,-0.668772,0.700174,-0.539697,-1.269708,-0.869827,-0.543943,-0.097946,-0.796963,...,0.00181,-0.229183,0.179341,0.578224,0.057873,-0.49016,0.01491,0.034723,69.0,0
60038,49126.0,-1.078806,0.346709,0.937811,-1.586407,-0.997869,-1.030757,0.428815,0.14642,-1.585147,...,-0.251264,-0.515104,-0.008041,0.407922,-0.064366,0.880976,-0.225994,-0.002576,94.0,0
31693,36474.0,-4.564317,-4.877094,1.407992,0.780386,-0.120295,-0.408491,-0.736098,1.032603,-1.165996,...,0.410231,-0.735606,1.074129,-0.118351,0.309854,-0.499497,-0.134161,-0.519133,533.76,0
165545,117501.0,1.900509,0.053522,-1.579659,1.328364,0.404513,-0.857943,0.599104,-0.289807,-0.07164,...,0.132307,0.482308,-0.040288,0.026159,0.402324,-0.51661,-0.025987,-0.06415,49.66,0
35134,37982.0,-1.011912,-0.914176,1.855144,-1.09331,-1.480964,1.520371,-1.797317,-2.190257,-1.314727,...,-1.163952,0.496301,-0.36041,-0.240846,0.745803,0.748225,-0.015442,0.213922,293.34,0


In [33]:
new_df['Class'].value_counts()

Class
0    492
1    492
Name: count, dtype: int64