In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import SMOTE
import joblib
from sklearn.metrics import classification_report,confusion_matrix,roc_auc_score

In [2]:
df=pd.read_csv("creditcard.csv")
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [3]:
df.isnull().sum()
len(df)
df.describe()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
count,37400.0,37400.0,37400.0,37400.0,37400.0,37400.0,37400.0,37400.0,37400.0,37400.0,...,37400.0,37400.0,37400.0,37400.0,37400.0,37400.0,37400.0,37400.0,37400.0,37400.0
mean,24644.281979,-0.206964,0.061457,0.717948,0.193094,-0.221222,0.098875,-0.114842,0.034855,0.238696,...,-0.03037,-0.112456,-0.041243,0.00637,0.136237,0.021956,0.009414,0.003913,85.577088,0.002754
std,12511.198859,1.827838,1.542136,1.52166,1.404416,1.37958,1.307203,1.250785,1.232131,1.231647,...,0.762854,0.639543,0.550749,0.59385,0.435879,0.50565,0.387073,0.299352,231.978944,0.052407
min,0.0,-30.55238,-40.978852,-31.103685,-5.172595,-42.147898,-23.496714,-26.548144,-41.484823,-7.175097,...,-20.262054,-8.593642,-26.751119,-2.836627,-7.495741,-1.43865,-8.567638,-9.617915,0.0,0.0
25%,13594.25,-0.960967,-0.512159,0.239844,-0.714708,-0.8254,-0.640593,-0.596026,-0.154778,-0.538333,...,-0.238616,-0.536077,-0.178626,-0.328045,-0.127557,-0.331298,-0.063197,-0.007026,7.14,0.0
50%,29702.5,-0.232374,0.108341,0.822814,0.189301,-0.259027,-0.159543,-0.073669,0.045361,0.113501,...,-0.079642,-0.085989,-0.052256,0.061018,0.175972,-0.064,0.008702,0.021236,22.75,0.0
75%,34827.25,1.16188,0.749137,1.452476,1.074294,0.301474,0.487298,0.435013,0.310303,0.967016,...,0.097372,0.298167,0.076457,0.398731,0.421309,0.301191,0.085846,0.075756,77.2975,0.0
max,38942.0,1.960497,16.713389,4.101716,13.143668,34.099309,22.529298,36.677268,20.007208,10.392889,...,22.614889,5.805795,17.297845,4.014444,5.525093,3.517346,11.13574,5.678671,7879.42,1.0


In [4]:
len(df)

37400

In [5]:
X =df.drop('Class',axis=1)
y=df['Class']

In [6]:
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X, y)


In [7]:
scaler = StandardScaler()
X[['Time','Amount']]=scaler.fit_transform(X[['Time','Amount']])

In [8]:
X_train,X_test,y_train,y_test = train_test_split(X_res,y_res,test_size = 0.3,random_state=42)

In [9]:
model = RandomForestClassifier()
model.fit(X_train,y_train)

In [10]:
y_pred=model.predict(X_test)
y_prob=model.predict_proba(X_test)[:,1]


In [11]:
print("Confusion matrix : ",confusion_matrix(y_pred,y_test))
print("classification report : \n",classification_report(y_test,y_pred))
print('Roc Auc',roc_auc_score(y_test,y_prob))

Confusion matrix :  [[11237     0]
 [    1 11141]]
classification report : 
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     11238
           1       1.00      1.00      1.00     11141

    accuracy                           1.00     22379
   macro avg       1.00      1.00      1.00     22379
weighted avg       1.00      1.00      1.00     22379

Roc Auc 0.9999998722070839


In [12]:
joblib.dump(model,'fraud_model_simple.joblib')

['fraud_model_simple.joblib']