In [2]:
import numpy as np
import pandas as pd

Context:
It is important that credit card companies are able to recognize fraudulent credit card transactions 
so that customers are not charged for items that they did not purchase.

Content:
The datasets contains transactions made by credit cards in September 2013 by european cardholders. 
This dataset presents transactions that occurred in two days, where we have 492 frauds out of 284,807 
transactions. The dataset is highly unbalanced, the positive class (frauds) account for 0.172% of all 
transactions.

It contains only numerical input variables which are the result of a PCA transformation. 
Unfortunately, due to confidentiality issues, we cannot provide the original features and more 
background information about the data. Features V1, V2, ... V28 are the principal components obtained
with PCA, the only features which have not been transformed with PCA are 'Time' and 'Amount'.
Feature 'Time' contains the seconds elapsed between each transaction and the first transaction 
in the dataset. The feature 'Amount' is the transaction Amount, this feature can be used for 
example-dependant cost-senstive learning. Feature 'Class' is the response variable and it takes 
value 1 in case of fraud and 0 otherwise.

In [3]:
# loading dataset
data=pd.read_csv(r"C:\Users\Pooja gupta\Downloads\archive\creditcard.csv")

In [4]:
data

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.166480,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.167170,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.379780,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.108300,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.50,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.206010,0.502292,0.219422,0.215153,69.99,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284802,172786.0,-11.881118,10.071785,-9.834783,-2.066656,-5.364473,-2.606837,-4.918215,7.305334,1.914428,...,0.213454,0.111864,1.014480,-0.509348,1.436807,0.250034,0.943651,0.823731,0.77,0
284803,172787.0,-0.732789,-0.055080,2.035030,-0.738589,0.868229,1.058415,0.024330,0.294869,0.584800,...,0.214205,0.924384,0.012463,-1.016226,-0.606624,-0.395255,0.068472,-0.053527,24.79,0
284804,172788.0,1.919565,-0.301254,-3.249640,-0.557828,2.630515,3.031260,-0.296827,0.708417,0.432454,...,0.232045,0.578229,-0.037501,0.640134,0.265745,-0.087371,0.004455,-0.026561,67.88,0
284805,172788.0,-0.240440,0.530483,0.702510,0.689799,-0.377961,0.623708,-0.686180,0.679145,0.392087,...,0.265245,0.800049,-0.163298,0.123205,-0.569159,0.546668,0.108821,0.104533,10.00,0


In [5]:
data.columns

Index(['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
       'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
       'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount',
       'Class'],
      dtype='object')

In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 284807 entries, 0 to 284806
Data columns (total 31 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Time    284807 non-null  float64
 1   V1      284807 non-null  float64
 2   V2      284807 non-null  float64
 3   V3      284807 non-null  float64
 4   V4      284807 non-null  float64
 5   V5      284807 non-null  float64
 6   V6      284807 non-null  float64
 7   V7      284807 non-null  float64
 8   V8      284807 non-null  float64
 9   V9      284807 non-null  float64
 10  V10     284807 non-null  float64
 11  V11     284807 non-null  float64
 12  V12     284807 non-null  float64
 13  V13     284807 non-null  float64
 14  V14     284807 non-null  float64
 15  V15     284807 non-null  float64
 16  V16     284807 non-null  float64
 17  V17     284807 non-null  float64
 18  V18     284807 non-null  float64
 19  V19     284807 non-null  float64
 20  V20     284807 non-null  float64
 21  V21     28

In [7]:
data.isnull().sum()

Time      0
V1        0
V2        0
V3        0
V4        0
V5        0
V6        0
V7        0
V8        0
V9        0
V10       0
V11       0
V12       0
V13       0
V14       0
V15       0
V16       0
V17       0
V18       0
V19       0
V20       0
V21       0
V22       0
V23       0
V24       0
V25       0
V26       0
V27       0
V28       0
Amount    0
Class     0
dtype: int64

In [8]:
data['Class'].value_counts()          # This dataset is highly unbalanced (0-> normal transaction, 1-> fraudulent transaction)

0    284315
1       492
Name: Class, dtype: int64

In [9]:
# separating the data for analysis
legit=data[data.Class==0]
fraud=data[data.Class==1]

In [10]:
legit

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.166480,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.167170,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.379780,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.108300,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.50,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.206010,0.502292,0.219422,0.215153,69.99,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284802,172786.0,-11.881118,10.071785,-9.834783,-2.066656,-5.364473,-2.606837,-4.918215,7.305334,1.914428,...,0.213454,0.111864,1.014480,-0.509348,1.436807,0.250034,0.943651,0.823731,0.77,0
284803,172787.0,-0.732789,-0.055080,2.035030,-0.738589,0.868229,1.058415,0.024330,0.294869,0.584800,...,0.214205,0.924384,0.012463,-1.016226,-0.606624,-0.395255,0.068472,-0.053527,24.79,0
284804,172788.0,1.919565,-0.301254,-3.249640,-0.557828,2.630515,3.031260,-0.296827,0.708417,0.432454,...,0.232045,0.578229,-0.037501,0.640134,0.265745,-0.087371,0.004455,-0.026561,67.88,0
284805,172788.0,-0.240440,0.530483,0.702510,0.689799,-0.377961,0.623708,-0.686180,0.679145,0.392087,...,0.265245,0.800049,-0.163298,0.123205,-0.569159,0.546668,0.108821,0.104533,10.00,0


In [11]:
fraud

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
541,406.0,-2.312227,1.951992,-1.609851,3.997906,-0.522188,-1.426545,-2.537387,1.391657,-2.770089,...,0.517232,-0.035049,-0.465211,0.320198,0.044519,0.177840,0.261145,-0.143276,0.00,1
623,472.0,-3.043541,-3.157307,1.088463,2.288644,1.359805,-1.064823,0.325574,-0.067794,-0.270953,...,0.661696,0.435477,1.375966,-0.293803,0.279798,-0.145362,-0.252773,0.035764,529.00,1
4920,4462.0,-2.303350,1.759247,-0.359745,2.330243,-0.821628,-0.075788,0.562320,-0.399147,-0.238253,...,-0.294166,-0.932391,0.172726,-0.087330,-0.156114,-0.542628,0.039566,-0.153029,239.93,1
6108,6986.0,-4.397974,1.358367,-2.592844,2.679787,-1.128131,-1.706536,-3.496197,-0.248778,-0.247768,...,0.573574,0.176968,-0.436207,-0.053502,0.252405,-0.657488,-0.827136,0.849573,59.00,1
6329,7519.0,1.234235,3.019740,-4.304597,4.732795,3.624201,-1.357746,1.713445,-0.496358,-1.282858,...,-0.379068,-0.704181,-0.656805,-1.632653,1.488901,0.566797,-0.010016,0.146793,1.00,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
279863,169142.0,-1.927883,1.125653,-4.518331,1.749293,-1.566487,-2.010494,-0.882850,0.697211,-2.064945,...,0.778584,-0.319189,0.639419,-0.294885,0.537503,0.788395,0.292680,0.147968,390.00,1
280143,169347.0,1.378559,1.289381,-5.004247,1.411850,0.442581,-1.326536,-1.413170,0.248525,-1.127396,...,0.370612,0.028234,-0.145640,-0.081049,0.521875,0.739467,0.389152,0.186637,0.76,1
280149,169351.0,-0.676143,1.126366,-2.213700,0.468308,-1.120541,-0.003346,-2.234739,1.210158,-0.652250,...,0.751826,0.834108,0.190944,0.032070,-0.739695,0.471111,0.385107,0.194361,77.89,1
281144,169966.0,-3.113832,0.585864,-5.399730,1.817092,-0.840618,-2.943548,-2.208002,1.058733,-1.632333,...,0.583276,-0.269209,-0.456108,-0.183659,-0.328168,0.606116,0.884876,-0.253700,245.00,1


In [12]:
legit.shape

(284315, 31)

In [13]:
fraud.shape

(492, 31)

In [14]:
# statistical measures of data
legit.Amount.describe()

count    284315.000000
mean         88.291022
std         250.105092
min           0.000000
25%           5.650000
50%          22.000000
75%          77.050000
max       25691.160000
Name: Amount, dtype: float64

In [15]:
fraud.Amount.describe()

count     492.000000
mean      122.211321
std       256.683288
min         0.000000
25%         1.000000
50%         9.250000
75%       105.890000
max      2125.870000
Name: Amount, dtype: float64

In [16]:
# compare the values for both transactions
data.groupby('Class').mean()

Unnamed: 0_level_0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,94838.202258,0.008258,-0.006271,0.012171,-0.00786,0.005453,0.002419,0.009637,-0.000987,0.004467,...,-0.000644,-0.001235,-2.4e-05,7e-05,0.000182,-7.2e-05,-8.9e-05,-0.000295,-0.000131,88.291022
1,80746.806911,-4.771948,3.623778,-7.033281,4.542029,-3.151225,-1.397737,-5.568731,0.570636,-2.581123,...,0.372319,0.713588,0.014049,-0.040308,-0.10513,0.041449,0.051648,0.170575,0.075667,122.211321


In [17]:
# Under-Sampling
# Build a sample dataset containing similar distribution of normal transactions and fraudulent transactions
# Number of fraudulent transactions --> 492

In [18]:
legit_sample=legit.sample(n=492)

In [19]:
legit_sample

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
33493,37255.0,1.086227,-0.250572,-1.527851,0.275611,2.251396,3.438627,-0.123033,0.736383,-0.088167,...,-0.022998,-0.302619,-0.277836,1.024586,0.958461,-0.257033,-0.009039,0.025826,111.74,0
173990,121720.0,-0.173101,1.169769,-1.635046,-0.949761,2.856274,-1.039775,1.937962,-0.694640,-0.502598,...,-0.021629,0.324436,-0.503273,-0.081764,-0.217137,0.405988,-0.324014,-0.213117,0.76,0
128061,78585.0,-0.681784,1.026792,0.748128,-0.043177,-0.102370,-0.506139,0.346200,0.564692,-0.810443,...,0.182431,0.307385,-0.015496,0.200722,-0.501997,0.262822,-0.031493,0.069761,6.79,0
111221,72149.0,1.266683,0.028179,0.248348,-0.084655,-0.325273,-0.559762,-0.095764,-0.111756,-0.134363,...,0.103204,0.348333,-0.137905,0.107695,0.444917,1.115145,-0.080790,-0.010040,7.13,0
90900,63225.0,1.191914,-0.179986,0.215716,1.038623,0.303714,1.586511,-0.556064,0.460144,0.809559,...,-0.151584,-0.122255,-0.292772,-1.689098,0.795768,-0.166941,0.063030,-0.005693,10.99,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
251920,155563.0,2.141218,-0.130452,-2.297188,-0.616817,0.729267,-0.853965,0.465777,-0.307762,-0.068641,...,0.063662,0.175872,0.038985,0.370171,0.109071,1.324006,-0.185004,-0.099504,9.99,0
93277,64330.0,-0.488293,0.752356,1.208187,0.755349,0.693514,-0.258960,0.785888,-0.149423,-0.814547,...,0.076220,0.163746,-0.090610,-0.431288,-0.045448,-0.371026,0.144386,0.154751,54.99,0
215854,140245.0,0.176272,0.658282,-0.031694,-0.507444,0.777988,-0.368106,1.158208,-0.215195,0.033963,...,0.049217,0.334242,-0.040510,0.720599,0.043376,-0.401916,0.027258,0.007089,43.05,0
102723,68331.0,-0.578088,-0.421281,0.303253,-3.642820,1.614361,3.285539,-0.658060,0.806311,-2.587802,...,-0.209653,-0.479732,-0.248219,0.968191,0.561827,-0.285321,-0.078634,0.052381,10.00,0


In [20]:
# concatenating two dataframes
new_dataset=pd.concat([legit_sample,fraud],axis=0)

In [21]:
new_dataset

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
33493,37255.0,1.086227,-0.250572,-1.527851,0.275611,2.251396,3.438627,-0.123033,0.736383,-0.088167,...,-0.022998,-0.302619,-0.277836,1.024586,0.958461,-0.257033,-0.009039,0.025826,111.74,0
173990,121720.0,-0.173101,1.169769,-1.635046,-0.949761,2.856274,-1.039775,1.937962,-0.694640,-0.502598,...,-0.021629,0.324436,-0.503273,-0.081764,-0.217137,0.405988,-0.324014,-0.213117,0.76,0
128061,78585.0,-0.681784,1.026792,0.748128,-0.043177,-0.102370,-0.506139,0.346200,0.564692,-0.810443,...,0.182431,0.307385,-0.015496,0.200722,-0.501997,0.262822,-0.031493,0.069761,6.79,0
111221,72149.0,1.266683,0.028179,0.248348,-0.084655,-0.325273,-0.559762,-0.095764,-0.111756,-0.134363,...,0.103204,0.348333,-0.137905,0.107695,0.444917,1.115145,-0.080790,-0.010040,7.13,0
90900,63225.0,1.191914,-0.179986,0.215716,1.038623,0.303714,1.586511,-0.556064,0.460144,0.809559,...,-0.151584,-0.122255,-0.292772,-1.689098,0.795768,-0.166941,0.063030,-0.005693,10.99,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
279863,169142.0,-1.927883,1.125653,-4.518331,1.749293,-1.566487,-2.010494,-0.882850,0.697211,-2.064945,...,0.778584,-0.319189,0.639419,-0.294885,0.537503,0.788395,0.292680,0.147968,390.00,1
280143,169347.0,1.378559,1.289381,-5.004247,1.411850,0.442581,-1.326536,-1.413170,0.248525,-1.127396,...,0.370612,0.028234,-0.145640,-0.081049,0.521875,0.739467,0.389152,0.186637,0.76,1
280149,169351.0,-0.676143,1.126366,-2.213700,0.468308,-1.120541,-0.003346,-2.234739,1.210158,-0.652250,...,0.751826,0.834108,0.190944,0.032070,-0.739695,0.471111,0.385107,0.194361,77.89,1
281144,169966.0,-3.113832,0.585864,-5.399730,1.817092,-0.840618,-2.943548,-2.208002,1.058733,-1.632333,...,0.583276,-0.269209,-0.456108,-0.183659,-0.328168,0.606116,0.884876,-0.253700,245.00,1


In [22]:
new_dataset.shape

(984, 31)

In [23]:
new_dataset['Class'].value_counts()

0    492
1    492
Name: Class, dtype: int64

In [24]:
new_dataset.groupby('Class').mean()

Unnamed: 0_level_0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,92897.873984,-0.03351,0.088282,0.039881,-0.027944,-0.000355,0.023266,0.040073,0.055108,0.018733,...,-0.001399,0.000867,0.014068,-0.016739,-0.000787,-0.005849,0.028055,-0.018067,0.009348,84.10437
1,80746.806911,-4.771948,3.623778,-7.033281,4.542029,-3.151225,-1.397737,-5.568731,0.570636,-2.581123,...,0.372319,0.713588,0.014049,-0.040308,-0.10513,0.041449,0.051648,0.170575,0.075667,122.211321


In [25]:
# Splitting the data into features ans targets
x=new_dataset.drop(columns='Class',axis=1)
y=new_dataset['Class']

In [26]:
x

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
33493,37255.0,1.086227,-0.250572,-1.527851,0.275611,2.251396,3.438627,-0.123033,0.736383,-0.088167,...,0.176125,-0.022998,-0.302619,-0.277836,1.024586,0.958461,-0.257033,-0.009039,0.025826,111.74
173990,121720.0,-0.173101,1.169769,-1.635046,-0.949761,2.856274,-1.039775,1.937962,-0.694640,-0.502598,...,-0.021409,-0.021629,0.324436,-0.503273,-0.081764,-0.217137,0.405988,-0.324014,-0.213117,0.76
128061,78585.0,-0.681784,1.026792,0.748128,-0.043177,-0.102370,-0.506139,0.346200,0.564692,-0.810443,...,-0.328571,0.182431,0.307385,-0.015496,0.200722,-0.501997,0.262822,-0.031493,0.069761,6.79
111221,72149.0,1.266683,0.028179,0.248348,-0.084655,-0.325273,-0.559762,-0.095764,-0.111756,-0.134363,...,0.009894,0.103204,0.348333,-0.137905,0.107695,0.444917,1.115145,-0.080790,-0.010040,7.13
90900,63225.0,1.191914,-0.179986,0.215716,1.038623,0.303714,1.586511,-0.556064,0.460144,0.809559,...,-0.160927,-0.151584,-0.122255,-0.292772,-1.689098,0.795768,-0.166941,0.063030,-0.005693,10.99
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
279863,169142.0,-1.927883,1.125653,-4.518331,1.749293,-1.566487,-2.010494,-0.882850,0.697211,-2.064945,...,1.252967,0.778584,-0.319189,0.639419,-0.294885,0.537503,0.788395,0.292680,0.147968,390.00
280143,169347.0,1.378559,1.289381,-5.004247,1.411850,0.442581,-1.326536,-1.413170,0.248525,-1.127396,...,0.226138,0.370612,0.028234,-0.145640,-0.081049,0.521875,0.739467,0.389152,0.186637,0.76
280149,169351.0,-0.676143,1.126366,-2.213700,0.468308,-1.120541,-0.003346,-2.234739,1.210158,-0.652250,...,0.247968,0.751826,0.834108,0.190944,0.032070,-0.739695,0.471111,0.385107,0.194361,77.89
281144,169966.0,-3.113832,0.585864,-5.399730,1.817092,-0.840618,-2.943548,-2.208002,1.058733,-1.632333,...,0.306271,0.583276,-0.269209,-0.456108,-0.183659,-0.328168,0.606116,0.884876,-0.253700,245.00


In [27]:
y

33493     0
173990    0
128061    0
111221    0
90900     0
         ..
279863    1
280143    1
280149    1
281144    1
281674    1
Name: Class, Length: 984, dtype: int64

In [28]:
from sklearn.model_selection import train_test_split

In [29]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,stratify=y,random_state=2)

In [30]:
x_train

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
100623,67571.0,-0.758469,-0.045410,-0.168438,-1.313275,-1.901763,0.739433,3.071892,-0.483422,0.618203,...,-0.032500,0.042619,0.397224,0.072229,-0.242276,0.560916,-0.540955,0.150606,-0.117140,549.06
140009,83483.0,-0.993952,0.900974,1.284641,-1.264047,-0.444727,-0.691982,0.098900,0.591507,0.011620,...,-0.190257,-0.026560,-0.192468,0.026854,0.070927,-0.436897,0.755416,0.141212,0.102948,4.23
74781,55753.0,-2.074697,0.274564,1.253084,-1.207747,-0.097813,-1.222763,0.818072,-0.367138,0.942748,...,0.072263,-0.363485,-0.289169,-0.207897,0.447894,-0.228897,0.704708,0.345287,0.407605,119.40
130500,79366.0,-1.506009,1.378743,0.307322,-1.732518,0.092259,0.387406,-0.267715,1.151280,-0.430527,...,0.003177,-0.031651,-0.201772,-0.113592,-1.295410,-0.143229,0.822914,0.132547,0.075853,3.69
26329,34011.0,1.288920,-0.499192,-0.185763,-1.808442,-0.353872,-0.322013,-0.314075,0.034500,1.282265,...,0.003054,-0.222342,-0.705053,-0.097879,-0.965237,0.378142,-0.189517,-0.003799,0.003897,40.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
203143,134671.0,2.279457,-0.552937,-2.426203,-0.808606,-0.042905,-1.997751,0.410793,-0.637362,-0.815361,...,-0.162032,0.464995,1.340944,-0.216020,0.106957,0.706699,0.301992,-0.107306,-0.094886,20.00
91317,63412.0,1.272468,0.084568,0.345833,0.638514,-0.159090,-0.150621,-0.059243,-0.099484,0.443974,...,-0.020106,-0.339739,-0.770828,-0.042766,-0.412176,0.478984,0.229205,-0.011063,0.011486,11.71
223618,143456.0,-2.006582,3.676577,-5.463811,7.232058,-1.627859,-0.996755,-4.299833,2.268867,-3.651067,...,0.474414,0.713907,-0.063868,0.167947,-0.449864,0.023702,0.536905,0.485864,-0.042393,1.00
43061,41353.0,-15.020981,8.075240,-16.298091,5.664820,-11.918153,-4.246957,-14.716668,9.435084,-6.795398,...,-0.995787,2.525115,-0.832074,-0.186117,0.429781,0.697103,0.056031,-1.310888,-0.707403,34.12


In [31]:
x_test

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
247995,153761.0,1.146259,1.403458,-4.159148,2.660107,-0.323217,-1.836071,-1.623740,0.259562,-1.132044,...,0.284831,0.564450,0.445744,-0.141136,-0.265517,0.362260,-0.416062,0.507370,0.243744,51.37
93277,64330.0,-0.488293,0.752356,1.208187,0.755349,0.693514,-0.258960,0.785888,-0.149423,-0.814547,...,0.318148,0.076220,0.163746,-0.090610,-0.431288,-0.045448,-0.371026,0.144386,0.154751,54.99
35658,38211.0,-0.285735,1.284516,0.535688,0.266447,0.473122,-1.365137,0.990818,-0.174606,-0.592249,...,-0.081685,-0.086843,-0.290061,-0.243744,0.238308,0.063232,0.330787,0.014893,0.081297,0.76
51724,45090.0,1.081514,-0.813124,0.045273,-0.587816,-0.739177,-0.529769,-0.170709,-0.072318,-1.027055,...,0.199931,-0.058864,-0.548278,0.129027,0.053457,0.089807,-0.501670,-0.009769,0.030862,135.00
15451,26833.0,-20.532751,12.373989,-23.009003,6.144821,-15.587296,-4.384491,-15.939003,13.696416,-3.948455,...,1.592754,1.754608,-1.466115,-0.856779,0.125777,1.402587,-0.223755,1.574249,0.469201,99.99
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
108258,70828.0,0.196707,1.189757,0.704882,2.891388,0.045555,1.245730,-1.198714,-2.421616,-1.232089,...,0.646616,-1.328132,0.189311,-0.005524,-0.814708,0.400924,0.286281,0.135215,0.257315,0.76
12108,21046.0,-16.917468,9.669900,-23.736443,11.824990,-9.830548,-2.514829,-17.290657,1.820408,-6.264903,...,0.996745,-2.336111,0.972755,1.241866,-1.051086,0.038009,0.672317,2.108471,-1.421243,1.00
99506,67150.0,-1.824295,0.403327,-1.994122,2.756558,-3.139064,0.408185,-1.209045,1.095634,-1.447225,...,1.503688,0.838760,0.341727,0.947506,-0.145493,0.049326,0.831065,0.332421,0.252713,489.71
74496,55614.0,-7.347955,2.397041,-7.572356,5.177819,-2.854838,-1.795239,-8.783235,0.437157,-3.740598,...,0.073164,-0.175273,0.543325,-0.547955,-0.503722,-0.310933,-0.163986,1.197895,0.378187,0.83


In [32]:
y_train

100623    1
140009    0
74781     0
130500    0
26329     0
         ..
203143    0
91317     0
223618    1
43061     1
10526     0
Name: Class, Length: 787, dtype: int64

In [33]:
y_test

247995    1
93277     0
35658     0
51724     0
15451     1
         ..
108258    1
12108     1
99506     1
74496     1
199799    0
Name: Class, Length: 197, dtype: int64

In [34]:
# model training

In [35]:
from sklearn.linear_model import LogisticRegression

In [36]:
lo_model=LogisticRegression()

In [37]:
# training the logistic regression model with training data
lo_model.fit(x_train,y_train)

LogisticRegression()

In [38]:
train_pred=lo_model.predict(x_train)
train_pred

array([0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1,
       0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1,
       1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0,
       1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1,
       1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1,
       0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1,
       0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0,
       1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1,
       0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1,
       0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1,

In [39]:
test_pred=lo_model.predict(x_test)
test_pred

array([1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1,
       1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0,
       1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0,
       0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1,
       0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1,
       0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0],
      dtype=int64)

In [40]:
y_test

247995    1
93277     0
35658     0
51724     0
15451     1
         ..
108258    1
12108     1
99506     1
74496     1
199799    0
Name: Class, Length: 197, dtype: int64

In [41]:
from sklearn.metrics import *

In [42]:
train_acc=accuracy_score(train_pred,y_train)*100
train_acc

94.40914866581956

In [43]:
test_acc=accuracy_score(test_pred,y_test)*100
test_acc

94.41624365482234

In [44]:
cm=confusion_matrix(test_pred,y_test)

In [45]:
cm

array([[96,  8],
       [ 3, 90]], dtype=int64)

In [46]:
from sklearn.tree import DecisionTreeClassifier

In [47]:
dt_model=DecisionTreeClassifier()

In [48]:
#fitting of data for training 
dt_model.fit(x_train,y_train)

DecisionTreeClassifier()

In [50]:
#predictions by model
dt_pred=dt_model.predict(x_test)
dt_pred

array([1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1,
       1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0,
       1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0,
       0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0,
       0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1,
       0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1,
       1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0],
      dtype=int64)

In [51]:
y_test

247995    1
93277     0
35658     0
51724     0
15451     1
         ..
108258    1
12108     1
99506     1
74496     1
199799    0
Name: Class, Length: 197, dtype: int64

In [52]:
accuracy_score(dt_pred,y_test)*100

91.87817258883248

In [53]:
confusion_matrix(dt_pred,y_test)

array([[90,  7],
       [ 9, 91]], dtype=int64)

In [54]:
parameter_values = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
df1 = pd.DataFrame(columns=['max_depth', 'accuracy'])
df1

Unnamed: 0,max_depth,accuracy


In [55]:
for X in parameter_values:
    model = DecisionTreeClassifier(max_depth=X)
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    acc_score = accuracy_score(y_test,y_pred)*100
    df1= df1.append({'max_depth' : X , 'accuracy' : acc_score}, ignore_index=True) 
df1

  df1= df1.append({'max_depth' : X , 'accuracy' : acc_score}, ignore_index=True)
  df1= df1.append({'max_depth' : X , 'accuracy' : acc_score}, ignore_index=True)
  df1= df1.append({'max_depth' : X , 'accuracy' : acc_score}, ignore_index=True)
  df1= df1.append({'max_depth' : X , 'accuracy' : acc_score}, ignore_index=True)
  df1= df1.append({'max_depth' : X , 'accuracy' : acc_score}, ignore_index=True)
  df1= df1.append({'max_depth' : X , 'accuracy' : acc_score}, ignore_index=True)
  df1= df1.append({'max_depth' : X , 'accuracy' : acc_score}, ignore_index=True)
  df1= df1.append({'max_depth' : X , 'accuracy' : acc_score}, ignore_index=True)
  df1= df1.append({'max_depth' : X , 'accuracy' : acc_score}, ignore_index=True)
  df1= df1.append({'max_depth' : X , 'accuracy' : acc_score}, ignore_index=True)
  df1= df1.append({'max_depth' : X , 'accuracy' : acc_score}, ignore_index=True)
  df1= df1.append({'max_depth' : X , 'accuracy' : acc_score}, ignore_index=True)
  df1= df1.append({'max_dept

Unnamed: 0,max_depth,accuracy
0,1.0,92.893401
1,2.0,92.385787
2,3.0,93.401015
3,4.0,93.401015
4,5.0,92.385787
5,6.0,92.385787
6,7.0,92.385787
7,8.0,92.385787
8,9.0,91.878173
9,10.0,90.862944


In [56]:
final_model = DecisionTreeClassifier(max_depth=3)
final_model.fit(x_train, y_train)
y_pred = final_model.predict(x_test)
acc_score = accuracy_score(y_test,y_pred)*100

In [57]:
acc_score

93.4010152284264

In [58]:
confusion_matrix(y_test,y_pred)

array([[97,  2],
       [11, 87]], dtype=int64)