<h1>Import necessary libraries</h1>

In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report


## Load the dataset

In [11]:
df = pd.read_csv('/content/creditcard.csv')

## Data exploration

In [12]:
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0.0
1,0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0.0
2,1,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0.0
3,1,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0.0
4,2,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0.0


In [13]:
df.describe()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
count,11959.0,11959.0,11959.0,11959.0,11959.0,11959.0,11959.0,11959.0,11959.0,11959.0,...,11958.0,11958.0,11958.0,11958.0,11958.0,11958.0,11958.0,11958.0,11958.0,11958.0
mean,8009.996822,-0.21623,0.277097,0.889505,0.282606,-0.086585,0.139986,-0.121943,-0.048727,0.911579,...,-0.061474,-0.15385,-0.033709,0.017113,0.104794,0.062891,0.00832,0.000271,62.352617,0.004349
std,6204.332248,1.583914,1.308884,1.331824,1.478162,1.191776,1.306285,1.153899,1.246823,1.192349,...,0.918705,0.626631,0.505355,0.591394,0.429413,0.561603,0.4051,0.26123,178.24701,0.065803
min,0.0,-27.670569,-34.607649,-22.804686,-4.657545,-32.092129,-23.496714,-26.548144,-23.632502,-7.175097,...,-11.468435,-8.593642,-19.254328,-2.512377,-4.781606,-1.338556,-7.9761,-3.575312,0.0,0.0
25%,2542.0,-0.978944,-0.261503,0.417186,-0.622456,-0.688114,-0.622521,-0.591335,-0.185243,0.196741,...,-0.268151,-0.543613,-0.171833,-0.333251,-0.144092,-0.353864,-0.080064,-0.015461,5.0,0.0
50%,6662.0,-0.340742,0.256346,0.951223,0.213029,-0.183847,-0.146903,-0.094876,0.013616,0.894911,...,-0.130062,-0.1249,-0.044299,0.072099,0.143136,0.001972,-0.003039,0.015452,15.95,0.0
75%,12382.0,1.161273,0.883626,1.613678,1.159141,0.346298,0.508432,0.431657,0.26756,1.58912,...,0.021687,0.234029,0.075686,0.401798,0.380076,0.409231,0.106344,0.073171,50.0,0.0
max,20642.0,1.960497,9.092123,4.101716,11.927512,34.099309,21.393069,34.303177,5.499963,10.392889,...,22.614889,4.534454,13.876221,3.200201,5.525093,3.517346,8.254376,4.860769,7712.43,1.0


## Data preprocessing

In [14]:
scaler = StandardScaler()
df['normalizedAmount'] = scaler.fit_transform(df['Amount'].values.reshape(-1, 1))
df = df.drop(['Time', 'Amount'], axis=1)
df = df.dropna() # add this line to drop rows with NaN values

In [16]:
df.isnull().any()

V1                  False
V2                  False
V3                  False
V4                  False
V5                  False
V6                  False
V7                  False
V8                  False
V9                  False
V10                 False
V11                 False
V12                 False
V13                 False
V14                 False
V15                 False
V16                 False
V17                 False
V18                 False
V19                 False
V20                 False
V21                 False
V22                 False
V23                 False
V24                 False
V25                 False
V26                 False
V27                 False
V28                 False
Class               False
normalizedAmount    False
dtype: bool

In [17]:
df['Class'].dropna()
df['normalizedAmount'].dropna()

0        0.489607
1       -0.334733
2        1.774619
3        0.343063
4        0.042849
           ...   
11953   -0.283341
11954   -0.299331
11955   -0.344270
11956    0.155058
11957   -0.283341
Name: normalizedAmount, Length: 11958, dtype: float64

## Model fitting

In [19]:
X = df.drop(['Class'], axis=1)
y = df['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)


## Model evaluation

In [20]:
y_pred = clf.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print("Confusion matrix:\n", cm)
print("Accuracy score:", accuracy_score(y_test, y_pred))
print("Classification report:\n", classification_report(y_test, y_pred))


Confusion matrix:
 [[2375    3]
 [   2   12]]
Accuracy score: 0.9979096989966555
Classification report:
               precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      2378
         1.0       0.80      0.86      0.83        14

    accuracy                           1.00      2392
   macro avg       0.90      0.93      0.91      2392
weighted avg       1.00      1.00      1.00      2392

