**Importing the Libraries**

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

**Reading the Dataset**

In [3]:
ccd=pd.read_csv('/content/creditcard.csv')

In [4]:
ccd.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0.0
1,0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0.0
2,1,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0.0
3,1,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0.0
4,2,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0.0


In [7]:
#dataset infomation
ccd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3973 entries, 0 to 3972
Data columns (total 31 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Time    3973 non-null   int64  
 1   V1      3973 non-null   float64
 2   V2      3973 non-null   float64
 3   V3      3973 non-null   float64
 4   V4      3973 non-null   float64
 5   V5      3973 non-null   float64
 6   V6      3973 non-null   float64
 7   V7      3973 non-null   float64
 8   V8      3973 non-null   float64
 9   V9      3973 non-null   float64
 10  V10     3973 non-null   float64
 11  V11     3973 non-null   float64
 12  V12     3973 non-null   float64
 13  V13     3973 non-null   float64
 14  V14     3973 non-null   float64
 15  V15     3973 non-null   float64
 16  V16     3973 non-null   float64
 17  V17     3973 non-null   float64
 18  V18     3973 non-null   float64
 19  V19     3973 non-null   float64
 20  V20     3973 non-null   float64
 21  V21     3973 non-null   float64
 22  

In [8]:
#missing value in the dataset
ccd.isnull().sum()

Unnamed: 0,0
Time,0
V1,0
V2,0
V3,0
V4,0
V5,0
V6,0
V7,0
V8,0
V9,0


**Distribution of normal and fraud transactions**

In [9]:
ccd['Class'].value_counts()

Unnamed: 0_level_0,count
Class,Unnamed: 1_level_1
0.0,3970
1.0,2


In [11]:
#separate the data
normal=ccd[ccd.Class==0]
fraud=ccd[ccd.Class==1]

In [12]:
normal.shape

(3970, 31)

In [13]:
fraud.shape

(2, 31)

**Building new Normal dataset containing similar distribution of normal and fraud transactions**

In [14]:
normal_sample=normal.sample(n=492)


In [15]:
#Concatenating the two dataset
new_dataset=pd.concat([normal_sample,fraud],axis=0)


In [16]:
new_dataset.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
3957,3583,-1.420369,1.094646,0.753289,0.034035,-0.121157,-0.86864,0.151573,0.482707,-0.466498,...,-0.250587,-0.604463,0.348927,0.262271,-0.056091,0.068879,0.18036,0.022647,8.95,0.0
2580,2112,-0.423787,0.727723,1.226385,-2.014555,0.267051,-1.203387,1.154494,-0.368496,0.692826,...,0.118235,0.791246,-0.235255,0.426641,-0.0176,-0.832516,0.260731,-0.012157,1.0,0.0
2501,2073,1.315678,0.460463,-0.345322,0.984692,0.263118,-0.896401,0.51292,-0.298582,-0.181788,...,-0.037291,-0.064481,-0.206346,-0.142845,0.911125,-0.261727,-0.011115,0.004553,0.99,0.0
3700,3168,-0.941724,0.501456,1.458467,-0.614358,-0.093612,0.306152,2.293719,-0.52482,-0.645434,...,-0.037664,0.131981,-0.252463,0.030032,0.413791,0.242909,-0.38918,-0.469416,244.5,0.0
1005,758,-3.059533,1.828599,-0.272524,-2.518533,-0.433718,-0.999147,-0.300953,-3.212499,2.258083,...,2.790298,-0.702683,0.626516,0.32694,0.131713,-0.104649,0.649376,0.11499,1.0,0.0


In [17]:
new_dataset.shape

(494, 31)

**Splitting the new_Dataset into X and Y**

In [18]:
X=new_dataset.drop(columns='Class',axis=1)
Y=new_dataset['Class']

In [19]:
X

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
3957,3583,-1.420369,1.094646,0.753289,0.034035,-0.121157,-0.868640,0.151573,0.482707,-0.466498,...,-0.057696,-0.250587,-0.604463,0.348927,0.262271,-0.056091,0.068879,0.180360,0.022647,8.95
2580,2112,-0.423787,0.727723,1.226385,-2.014555,0.267051,-1.203387,1.154494,-0.368496,0.692826,...,0.036293,0.118235,0.791246,-0.235255,0.426641,-0.017600,-0.832516,0.260731,-0.012157,1.00
2501,2073,1.315678,0.460463,-0.345322,0.984692,0.263118,-0.896401,0.512920,-0.298582,-0.181788,...,-0.164466,-0.037291,-0.064481,-0.206346,-0.142845,0.911125,-0.261727,-0.011115,0.004553,0.99
3700,3168,-0.941724,0.501456,1.458467,-0.614358,-0.093612,0.306152,2.293719,-0.524820,-0.645434,...,0.008989,-0.037664,0.131981,-0.252463,0.030032,0.413791,0.242909,-0.389180,-0.469416,244.50
1005,758,-3.059533,1.828599,-0.272524,-2.518533,-0.433718,-0.999147,-0.300953,-3.212499,2.258083,...,-0.400387,2.790298,-0.702683,0.626516,0.326940,0.131713,-0.104649,0.649376,0.114990,1.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3656,3125,-1.520063,1.447099,1.902492,-0.269103,-0.241576,0.056039,0.221126,0.305234,-0.509095,...,0.088616,0.181656,0.488271,-0.223435,0.270550,0.162425,0.315639,-0.556935,-0.164739,1.79
1947,1501,-1.930422,1.778782,0.411402,-0.869500,-0.615354,-0.534365,-0.149129,0.955953,0.037004,...,0.014952,-0.105609,-0.447741,0.000562,0.006215,-0.100401,0.284431,-0.026413,0.162983,1.00
2539,2099,-3.336341,-3.198254,3.107583,3.686053,3.148263,-1.489632,-2.936623,0.756123,-0.967325,...,1.352705,0.679383,0.696213,0.523777,0.018309,0.158771,0.396889,-0.027211,0.175223,46.94
541,406,-2.312227,1.951992,-1.609851,3.997906,-0.522188,-1.426545,-2.537387,1.391657,-2.770089,...,0.126911,0.517232,-0.035049,-0.465211,0.320198,0.044519,0.177840,0.261145,-0.143276,0.00


In [20]:
Y

Unnamed: 0,Class
3957,0.0
2580,0.0
2501,0.0
3700,0.0
1005,0.0
...,...
3656,0.0
1947,0.0
2539,0.0
541,1.0


**Splitting the dataset into Training Data and Testing Data**

In [21]:
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,stratify=Y,random_state=42)

In [22]:
print(X_train,X_test,Y_train,Y_test)

      Time        V1        V2        V3        V4        V5        V6  \
1089   839  0.486338 -1.063650  1.950002  3.299857 -1.617433  1.093129   
3169  2744 -1.372275  0.068146 -0.753773 -3.377237  1.545895  3.313358   
2462  2029  1.073759  0.182407  0.322718  1.358284 -0.275214 -0.706538   
2339  1881  1.227842 -0.060914  0.842038  0.890731 -0.245296  0.969620   
2404  1964 -1.618062 -0.283936  2.441065  0.329787  0.221055 -1.262277   
...    ...       ...       ...       ...       ...       ...       ...   
1571  1229 -1.161665 -0.343536  0.707184  0.391784  2.159449 -2.187319   
1768  1364 -0.769547  0.882482  1.984986  0.987157 -0.260967  0.329474   
897    677 -0.371028  0.163148  2.731392  0.731217 -0.321304  0.840124   
3400  2922 -1.055570  0.667092  0.774202 -2.244574  0.419298 -1.087666   
457    335 -0.779988  1.395775  1.780828 -0.187325  0.953850 -0.511395   

            V7        V8        V9  ...       V20       V21       V22  \
1089 -0.811582  0.340450  1.022500  ..

**Model Training-Logistic Regression**

In [23]:
model=LogisticRegression()

In [24]:
model.fit(X_train,Y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


**Model Evaluation- Accuracy Score**

In [25]:
y_predict=model.predict(X_test)

In [26]:
acc=accuracy_score(Y_test,y_predict)

In [27]:
acc

1.0