**Importing the Required Libraries**

In [98]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor, export_graphviz
from sklearn.ensemble import BaggingClassifier, BaggingRegressor,RandomForestClassifier,RandomForestRegressor
from sklearn.ensemble import GradientBoostingClassifier,GradientBoostingRegressor, AdaBoostClassifier, AdaBoostRegressor
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

**Importing the data**

In [99]:
data = pd.read_csv("creditcard.csv")

**Data preprocessing**

In [100]:
print(data.head())

   Time        V1        V2        V3        V4        V5        V6        V7  \
0   0.0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   
1   0.0  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   
2   1.0 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   
3   1.0 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   
4   2.0 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   

         V8        V9  ...       V21       V22       V23       V24       V25  \
0  0.098698  0.363787  ... -0.018307  0.277838 -0.110474  0.066928  0.128539   
1  0.085102 -0.255425  ... -0.225775 -0.638672  0.101288 -0.339846  0.167170   
2  0.247676 -1.514654  ...  0.247998  0.771679  0.909412 -0.689281 -0.327642   
3  0.377436 -1.387024  ... -0.108300  0.005274 -0.190321 -1.175575  0.647376   
4 -0.270533  0.817739  ... -0.009431  0.798278 -0.137458  0.141267 -0.206010   

        V26       V27       V28 

In [101]:
print(data.isnull().sum())

Time      0
V1        0
V2        0
V3        0
V4        0
V5        0
V6        0
V7        0
V8        0
V9        0
V10       0
V11       0
V12       0
V13       0
V14       0
V15       0
V16       0
V17       0
V18       0
V19       0
V20       0
V21       0
V22       0
V23       0
V24       0
V25       0
V26       0
V27       0
V28       0
Amount    0
Class     0
dtype: int64


In [102]:

Total_transactions = len(data)
normal = len(data[data.Class == 0])
fraudulent = len(data[data.Class == 1])
fraud_percentage = round(fraudulent/normal*100, 2)
print('Total number of Trnsactions are ',Total_transactions)
print('Number of Normal Transactions are ',normal)
print('Number of fraudulent Transactions are ',fraudulent)
print('Percentage of fraud Transactions is ',fraud_percentage)

Total number of Trnsactions are  284807
Number of Normal Transactions are  284315
Number of fraudulent Transactions are  492
Percentage of fraud Transactions is  0.17


In [103]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 284807 entries, 0 to 284806
Data columns (total 31 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Time    284807 non-null  float64
 1   V1      284807 non-null  float64
 2   V2      284807 non-null  float64
 3   V3      284807 non-null  float64
 4   V4      284807 non-null  float64
 5   V5      284807 non-null  float64
 6   V6      284807 non-null  float64
 7   V7      284807 non-null  float64
 8   V8      284807 non-null  float64
 9   V9      284807 non-null  float64
 10  V10     284807 non-null  float64
 11  V11     284807 non-null  float64
 12  V12     284807 non-null  float64
 13  V13     284807 non-null  float64
 14  V14     284807 non-null  float64
 15  V15     284807 non-null  float64
 16  V16     284807 non-null  float64
 17  V17     284807 non-null  float64
 18  V18     284807 non-null  float64
 19  V19     284807 non-null  float64
 20  V20     284807 non-null  float64
 21  V21     28

In [104]:
print(min(data.Amount))
print(max(data.Amount))

0.0
25691.16


**Since all the columns in data are transformed but Amount is not transformed we can can also do that**


In [105]:

sc = StandardScaler()
amount = data['Amount'].values
data['Amount'] = sc.fit_transform(amount.reshape(-1, 1))

In [106]:
print(min(data.Amount))
print(max(data.Amount))

-0.35322939296682354
102.36224270928423


In [107]:
data.drop(['Time'], axis=1, inplace=True)

In [108]:
print(data.shape)

(284807, 30)


In [109]:
data.drop_duplicates(inplace=True)

In [110]:
data.shape

(275663, 30)

**MODEL DEVELOPEMENT**

*Finding the independent and dependent variables from the data*

In [111]:
X=data.drop('Class',axis=1).values
y=data['Class'].values

*Making the Training and test data*

In [112]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 1)

**Decision Tree Classifier Model**


In [113]:

model = DecisionTreeClassifier(max_depth=4,criterion='entropy')
model.fit(X_train,y_train)
y_hat=model.predict(X_test)

In [114]:
print('Accuracy score of the Decision Tree model is ',accuracy_score(y_test,y_hat))

Accuracy score of the Decision Tree model is  0.9991438853096524


In [115]:
print('F1 Score: ', f1_score(y_test,y_hat))

F1 Score:  0.7489361702127659


In [116]:
confusion_matrix(y_test, y_hat, labels = [0, 1])

array([[68769,    19],
       [   40,    88]], dtype=int64)

**Random Forest Algorithm**

In [117]:
rf = RandomForestClassifier(max_depth = 4)
rf.fit(X_train, y_train)
rf_yhat = rf.predict(X_test)

In [118]:
print('Accuracy score of the Decision Tree model is ',accuracy_score(y_test,rf_yhat))
print('F1 Score: ', f1_score(y_test,rf_yhat))
print('Confusion Matrix: ')
confusion_matrix(y_test, rf_yhat, labels = [0, 1])

Accuracy score of the Decision Tree model is  0.9991874165650937
F1 Score:  0.7454545454545454
Confusion Matrix: 


array([[68778,    10],
       [   46,    82]], dtype=int64)

**XGBoost Algorithm**

In [119]:
xgb = XGBClassifier(max_depth = 4)
xgb.fit(X_train, y_train)
xgb_yhat = xgb.predict(X_test)


In [120]:
print('Accuracy score of the Decision Tree model is ',accuracy_score(y_test,xgb_yhat))
print('F1 Score: ', f1_score(y_test,xgb_yhat))
print('Confusion Matrix: ')
confusion_matrix(y_test, xgb_yhat, labels = [0, 1])

Accuracy score of the Decision Tree model is  0.999506645771664
F1 Score:  0.8495575221238937
Confusion Matrix: 


array([[68786,     2],
       [   32,    96]], dtype=int64)

**_The End!_**