#### Model Development

In [32]:
# Libraries

import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC 
from sklearn.metrics import accuracy_score

In [3]:
# Static data visualization

import seaborn as sns

In [4]:
# Dataset

data = pd.read_csv('../data/creditcard_smot.csv')
df = pd.DataFrame(data)

In [5]:
# Printing random 5 data rows from the dataset

data.sample(5)

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
28170,34934.0,-3.029966,2.62178,1.185,2.521261,-0.733559,2.221726,-3.90245,-10.245756,-1.01239,...,-1.434609,1.207432,0.968631,0.333968,-0.331814,0.126484,0.002243,0.003262,12.88,0
118068,75117.0,0.513775,-1.850612,-0.399994,-0.077888,-1.29149,-0.886463,0.44614,-0.464147,-0.976971,...,-0.159854,-0.861899,-0.422041,0.478578,0.291343,1.040059,-0.150619,0.080123,468.7,0
135189,81362.0,1.115443,0.13812,0.276809,0.908619,0.029089,0.14004,-0.040297,0.082255,-0.217475,...,0.197322,0.595561,-0.191105,-0.268059,0.649406,-0.224756,0.032832,0.011451,33.6,0
269261,163999.0,1.970548,-0.760623,-1.67157,-0.637687,1.66529,3.926531,-1.30441,1.058728,1.11059,...,0.160352,0.564189,0.202586,0.759793,-0.245286,0.589136,-0.000253,-0.052949,11.5,0
162402,115575.0,2.037753,0.119798,-1.725538,0.554275,0.060828,-1.549432,0.226124,-0.348185,0.725958,...,0.191522,0.664284,-0.016825,-0.041613,0.21811,-0.101997,-0.010025,-0.031317,12.9,0


In [6]:
# Size of the dataset

data.shape

(283726, 31)

In [9]:
# Statistical measure of data

data.describe()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
count,283726.0,283726.0,283726.0,283726.0,283726.0,283726.0,283726.0,283726.0,283726.0,283726.0,...,283726.0,283726.0,283726.0,283726.0,283726.0,283726.0,283726.0,283726.0,283726.0,283726.0
mean,94811.0776,0.005917,-0.004135,0.001613,-0.002966,0.001828,-0.001139,0.001801,-0.000854,-0.001596,...,-0.000371,-1.5e-05,0.000198,0.000214,-0.000232,0.000149,0.001763,0.000547,88.472687,0.001667
std,47481.047891,1.948026,1.646703,1.508682,1.414184,1.377008,1.331931,1.227664,1.179054,1.095492,...,0.723909,0.72455,0.623702,0.605627,0.52122,0.482053,0.395744,0.328027,250.399437,0.040796
min,0.0,-56.40751,-72.715728,-48.325589,-5.683171,-113.743307,-26.160506,-43.557242,-73.216718,-13.434066,...,-34.830382,-10.933144,-44.807735,-2.836627,-10.295397,-2.604551,-22.565679,-15.430084,0.0,0.0
25%,54204.75,-0.915951,-0.600321,-0.889682,-0.850134,-0.68983,-0.769031,-0.552509,-0.208828,-0.644221,...,-0.228305,-0.5427,-0.161703,-0.354453,-0.317485,-0.326763,-0.070641,-0.052818,5.6,0.0
50%,84692.5,0.020384,0.063949,0.179963,-0.022248,-0.053468,-0.275168,0.040859,0.021898,-0.052596,...,-0.029441,0.006675,-0.011159,0.041016,0.016278,-0.052172,0.001479,0.011288,22.0,0.0
75%,139298.0,1.316068,0.800283,1.02696,0.739647,0.612218,0.396792,0.570474,0.325704,0.595977,...,0.186194,0.528245,0.147748,0.439738,0.350667,0.240261,0.091208,0.078276,77.51,0.0
max,172792.0,2.45493,22.057729,9.382558,16.875344,34.801666,73.301626,120.589494,20.007208,15.594995,...,27.202839,10.50309,22.528412,4.584549,7.519589,3.517346,31.612198,33.847808,25691.16,1.0


In [11]:
# Seperating the transaction types
# 0 -> Legitimate transactions & 1 -> Fraudulent transactions

data['Class'].value_counts()

Class
0    283253
1       473
Name: count, dtype: int64

In [12]:
# Mean values of all the features based on Class

data.groupby('Class').mean()

Unnamed: 0_level_0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,94835.058093,0.013439,-0.009829,0.012853,-0.01044,0.006769,0.001251,0.010447,-0.002448,0.002613,...,-0.000489,-0.00115,-0.00016,0.00036,0.000393,-0.000301,6.5e-05,0.001409,0.000418,88.413575
1,80450.513742,-4.49828,3.405965,-6.729599,4.472591,-2.957197,-1.432518,-5.175912,0.953255,-2.522124,...,0.405043,0.46655,0.086639,-0.096464,-0.106643,0.040615,0.050456,0.213774,0.07827,123.87186


In [13]:
# Data labels

X = data.drop(columns='Class', axis=1)
Y = data['Class']

In [15]:
# View X 

print(X)

            Time         V1         V2        V3        V4        V5  \
0            0.0  -1.359807  -0.072781  2.536347  1.378155 -0.338321   
1            0.0   1.191857   0.266151  0.166480  0.448154  0.060018   
2            1.0  -1.358354  -1.340163  1.773209  0.379780 -0.503198   
3            1.0  -0.966272  -0.185226  1.792993 -0.863291 -0.010309   
4            2.0  -1.158233   0.877737  1.548718  0.403034 -0.407193   
...          ...        ...        ...       ...       ...       ...   
283721  172786.0 -11.881118  10.071785 -9.834783 -2.066656 -5.364473   
283722  172787.0  -0.732789  -0.055080  2.035030 -0.738589  0.868229   
283723  172788.0   1.919565  -0.301254 -3.249640 -0.557828  2.630515   
283724  172788.0  -0.240440   0.530483  0.702510  0.689799 -0.377961   
283725  172792.0  -0.533413  -0.189733  0.703337 -0.506271 -0.012546   

              V6        V7        V8        V9  ...       V20       V21  \
0       0.462388  0.239599  0.098698  0.363787  ...  0.25141

In [16]:
# View Y 

print(Y)

0         0
1         0
2         0
3         0
4         0
         ..
283721    0
283722    0
283723    0
283724    0
283725    0
Name: Class, Length: 283726, dtype: int64


In [21]:
# Process the X label to standarize X

scale = StandardScaler()
scale.fit(X)

In [20]:
s_data = scale.transform(X)

print(s_data)

[[-1.99682292 -0.70108232 -0.04168726 ...  0.33303251 -0.06584955
   0.24419951]
 [-1.99682292  0.60879165  0.16413764 ... -0.0271543   0.0432187
  -0.34258399]
 [-1.99680186 -0.7003364  -0.81133678 ... -0.14432548 -0.18382429
   1.15889967]
 ...
 [ 1.64227757  0.98235398 -0.18043304 ...  0.00680174 -0.08264021
  -0.0822395 ]
 [ 1.64227757 -0.12646526  0.32465977 ...  0.27052318  0.31700384
  -0.31339058]
 [ 1.64236181 -0.27686005 -0.1127094  ... -0.01055821  0.03994074
   0.51329005]]


In [22]:
X = s_data
Y = data['Class']

In [23]:
print(X)

[[-1.99682292 -0.70108232 -0.04168726 ...  0.33303251 -0.06584955
   0.24419951]
 [-1.99682292  0.60879165  0.16413764 ... -0.0271543   0.0432187
  -0.34258399]
 [-1.99680186 -0.7003364  -0.81133678 ... -0.14432548 -0.18382429
   1.15889967]
 ...
 [ 1.64227757  0.98235398 -0.18043304 ...  0.00680174 -0.08264021
  -0.0822395 ]
 [ 1.64227757 -0.12646526  0.32465977 ...  0.27052318  0.31700384
  -0.31339058]
 [ 1.64236181 -0.27686005 -0.1127094  ... -0.01055821  0.03994074
   0.51329005]]


In [24]:
print(Y)

0         0
1         0
2         0
3         0
4         0
         ..
283721    0
283722    0
283723    0
283724    0
283725    0
Name: Class, Length: 283726, dtype: int64


In [27]:
# Split the dataset (Testing datase = 20%)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

# Print the details splitted X

print(X.shape, X_train.shape, X_test.shape)

(283726, 30) (226980, 30) (56746, 30)


In [31]:
# Training the model

model = SVC(kernel = 'sigmoid')

model.fit(X_train, Y_train)


#### Model Evaluation

In [35]:
# Accuracy score of the training dataset

X_train_predi = model.predict(X_train)

training_accuracy = accuracy_score(X_train_predi, Y_train)

print('Accuracy score of training data : ', training_accuracy)


Accuracy score of training data :  0.9982333245219843


In [37]:
# Accuracy score of the testing dataset

X_test_predi = model.predict(X_test)

testing_accuracy = accuracy_score(X_test_predi, Y_test)

print('Accuracy score of testing data : ', testing_accuracy)

Accuracy score of testing data :  0.9981320269270081


#### Use the trained SVM model for preditions