**Importing the Libraries**

In [51]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

**Reading the Dataset**

In [52]:
ccdata=pd.read_csv('/content/creditcard.csv')

In [53]:
ccdata.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [54]:
#dataset infomation
ccdata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 284807 entries, 0 to 284806
Data columns (total 31 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Time    284807 non-null  float64
 1   V1      284807 non-null  float64
 2   V2      284807 non-null  float64
 3   V3      284807 non-null  float64
 4   V4      284807 non-null  float64
 5   V5      284807 non-null  float64
 6   V6      284807 non-null  float64
 7   V7      284807 non-null  float64
 8   V8      284807 non-null  float64
 9   V9      284807 non-null  float64
 10  V10     284807 non-null  float64
 11  V11     284807 non-null  float64
 12  V12     284807 non-null  float64
 13  V13     284807 non-null  float64
 14  V14     284807 non-null  float64
 15  V15     284807 non-null  float64
 16  V16     284807 non-null  float64
 17  V17     284807 non-null  float64
 18  V18     284807 non-null  float64
 19  V19     284807 non-null  float64
 20  V20     284807 non-null  float64
 21  V21     28

In [55]:
#missing value in the dataset
ccdata.isnull().sum()

Time      0
V1        0
V2        0
V3        0
V4        0
V5        0
V6        0
V7        0
V8        0
V9        0
V10       0
V11       0
V12       0
V13       0
V14       0
V15       0
V16       0
V17       0
V18       0
V19       0
V20       0
V21       0
V22       0
V23       0
V24       0
V25       0
V26       0
V27       0
V28       0
Amount    0
Class     0
dtype: int64

**Distribution of normal and fraud transactions**

In [56]:
ccdata['Class'].value_counts()

0    284315
1       492
Name: Class, dtype: int64

In [57]:
#separate the data
normal=ccdata[ccdata.Class==0]
fraud=ccdata[ccdata.Class==1]

In [58]:
normal.shape

(284315, 31)

In [59]:
fraud.shape

(492, 31)

**Building new Normal dataset containing similar distribution of normal and fraud transactions**

In [60]:
normal_sample=normal.sample(n=492)


In [61]:
#Concatenating the two dataset
new_dataset=pd.concat([normal_sample,fraud],axis=0)


In [62]:
new_dataset.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
23334,32705.0,-0.725619,0.214936,-0.098592,0.31687,3.068862,3.469588,-0.295951,1.021052,-0.371377,...,-0.067408,-0.187081,-0.102559,1.012307,0.166833,-0.237405,0.394012,0.212737,9.35,0
380,279.0,-0.857063,0.204627,2.186985,-0.045589,0.472689,-0.18745,0.435892,0.010475,-0.182091,...,-0.237293,-0.611724,-0.190796,0.003336,0.212666,0.295664,-0.17526,-0.165614,19.29,0
153530,99325.0,2.058972,0.128677,-0.902313,0.602133,0.052002,-1.058646,0.143832,-0.44614,1.67402,...,-0.422966,-0.779528,0.347533,-0.046982,-0.302482,0.159217,-0.0876,-0.064925,0.89,0
204679,135392.0,-0.146052,0.13799,0.500156,-0.288341,0.208516,-0.36296,1.007551,-0.381682,1.162779,...,0.353429,1.616194,-0.250993,-0.024513,-0.642798,-0.172211,0.031964,0.021414,80.63,0
212409,138843.0,1.988379,0.040464,-1.865702,0.325747,0.381941,-0.803038,0.077306,-0.140597,0.151496,...,0.216369,0.624263,0.034082,0.716302,0.03406,0.62293,-0.077114,-0.038578,26.73,0


In [63]:
new_dataset.shape

(984, 31)

**Splitting the new_Dataset into X and Y**

In [64]:
X=new_dataset.drop(columns='Class',axis=1)
Y=new_dataset['Class']

In [65]:
X

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
23334,32705.0,-0.725619,0.214936,-0.098592,0.316870,3.068862,3.469588,-0.295951,1.021052,-0.371377,...,0.443072,-0.067408,-0.187081,-0.102559,1.012307,0.166833,-0.237405,0.394012,0.212737,9.35
380,279.0,-0.857063,0.204627,2.186985,-0.045589,0.472689,-0.187450,0.435892,0.010475,-0.182091,...,0.175356,-0.237293,-0.611724,-0.190796,0.003336,0.212666,0.295664,-0.175260,-0.165614,19.29
153530,99325.0,2.058972,0.128677,-0.902313,0.602133,0.052002,-1.058646,0.143832,-0.446140,1.674020,...,-0.211876,-0.422966,-0.779528,0.347533,-0.046982,-0.302482,0.159217,-0.087600,-0.064925,0.89
204679,135392.0,-0.146052,0.137990,0.500156,-0.288341,0.208516,-0.362960,1.007551,-0.381682,1.162779,...,-0.206304,0.353429,1.616194,-0.250993,-0.024513,-0.642798,-0.172211,0.031964,0.021414,80.63
212409,138843.0,1.988379,0.040464,-1.865702,0.325747,0.381941,-0.803038,0.077306,-0.140597,0.151496,...,-0.105126,0.216369,0.624263,0.034082,0.716302,0.034060,0.622930,-0.077114,-0.038578,26.73
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
279863,169142.0,-1.927883,1.125653,-4.518331,1.749293,-1.566487,-2.010494,-0.882850,0.697211,-2.064945,...,1.252967,0.778584,-0.319189,0.639419,-0.294885,0.537503,0.788395,0.292680,0.147968,390.00
280143,169347.0,1.378559,1.289381,-5.004247,1.411850,0.442581,-1.326536,-1.413170,0.248525,-1.127396,...,0.226138,0.370612,0.028234,-0.145640,-0.081049,0.521875,0.739467,0.389152,0.186637,0.76
280149,169351.0,-0.676143,1.126366,-2.213700,0.468308,-1.120541,-0.003346,-2.234739,1.210158,-0.652250,...,0.247968,0.751826,0.834108,0.190944,0.032070,-0.739695,0.471111,0.385107,0.194361,77.89
281144,169966.0,-3.113832,0.585864,-5.399730,1.817092,-0.840618,-2.943548,-2.208002,1.058733,-1.632333,...,0.306271,0.583276,-0.269209,-0.456108,-0.183659,-0.328168,0.606116,0.884876,-0.253700,245.00


In [66]:
Y

23334     0
380       0
153530    0
204679    0
212409    0
         ..
279863    1
280143    1
280149    1
281144    1
281674    1
Name: Class, Length: 984, dtype: int64

**Splitting the dataset into Training Data and Testing Data**

In [67]:
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,stratify=Y,random_state=42)

In [68]:
print(X_train,X_test,Y_train,Y_test)

            Time         V1         V2         V3        V4         V5  \
62829    50448.0  -1.357418   0.293186   1.667634 -2.940500  -1.021758   
52533    45510.0  -0.806710   1.172413   1.762872  0.039449  -0.015919   
56703    47545.0   1.176716   0.557091  -0.490800  0.756424   0.249192   
150684   93888.0 -10.040631   6.139183 -12.972972  7.740555  -8.684705   
6882      8808.0  -4.617217   1.695694  -3.114372  4.328199  -1.873257   
...          ...        ...        ...        ...       ...        ...   
39183    39729.0  -0.964567  -1.643541  -0.187727  1.158253  -2.458336   
115476   73885.0  -1.201006  -1.687557  -0.492951 -0.267969  -2.314567   
143335   85285.0  -6.713407   3.921104  -9.746678  5.148263  -5.151563   
208837  137295.0   2.208285  -0.845871  -0.792760 -1.135225  -0.583822   
17317    28625.0 -27.848181  15.598193 -28.923756  6.418442 -20.346228   

              V6         V7         V8        V9  ...       V20       V21  \
62829  -0.456800   0.107914   0.66

**Model Training-Logistic Regression**

In [69]:
model=LogisticRegression()

In [70]:
model.fit(X_train,Y_train)

LogisticRegression()

**Model Evaluation- Accuracy Score**

In [71]:
y_predict=model.predict(X_test)

In [72]:
acc=accuracy_score(Y_test,y_predict)

In [73]:
acc

0.9441624365482234