In [1]:
# import required libraries
import pandas as pd
import numpy as np

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix 

In [3]:
# load dataset
data = pd.read_csv(r"C:\Users\Barsha\Desktop\Credit-Card-Fraud-Detection\creditcard-dataset.csv")

In [4]:
# basic data analysis
print(data.head(5))

   Time        V1        V2        V3        V4        V5        V6        V7  \
0   0.0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   
1   0.0  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   
2   1.0 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   
3   1.0 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   
4   2.0 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   

         V8        V9  ...       V21       V22       V23       V24       V25  \
0  0.098698  0.363787  ... -0.018307  0.277838 -0.110474  0.066928  0.128539   
1  0.085102 -0.255425  ... -0.225775 -0.638672  0.101288 -0.339846  0.167170   
2  0.247676 -1.514654  ...  0.247998  0.771679  0.909412 -0.689281 -0.327642   
3  0.377436 -1.387024  ... -0.108300  0.005274 -0.190321 -1.175575  0.647376   
4 -0.270533  0.817739  ... -0.009431  0.798278 -0.137458  0.141267 -0.206010   

        V26       V27       V28 

In [5]:
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 284807 entries, 0 to 284806
Data columns (total 31 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Time    284807 non-null  float64
 1   V1      284807 non-null  float64
 2   V2      284807 non-null  float64
 3   V3      284807 non-null  float64
 4   V4      284807 non-null  float64
 5   V5      284807 non-null  float64
 6   V6      284807 non-null  float64
 7   V7      284807 non-null  float64
 8   V8      284807 non-null  float64
 9   V9      284807 non-null  float64
 10  V10     284807 non-null  float64
 11  V11     284807 non-null  float64
 12  V12     284807 non-null  float64
 13  V13     284807 non-null  float64
 14  V14     284807 non-null  float64
 15  V15     284807 non-null  float64
 16  V16     284807 non-null  float64
 17  V17     284807 non-null  float64
 18  V18     284807 non-null  float64
 19  V19     284807 non-null  float64
 20  V20     284807 non-null  float64
 21  V21     28

In [6]:
print(data['Class'].value_counts())

Class
0    284315
1       492
Name: count, dtype: int64


In [7]:
# feature and target split
x = data.drop('Class', axis = 1)
y = data['Class']

In [8]:
# feature scalling
scaler = StandardScaler()
x_scaled = scaler.fit_transform(x)

In [9]:
# train-test split
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y, test_size=0.2,random_state=42, stratify=y)

In [10]:
# train the ml model
model = LogisticRegression(max_iter=1000)
model.fit(x_train, y_train)

In [11]:
#model evaluation
y_pred = model.predict(x_test)
print('Accuracy:',accuracy_score(y_test, y_pred))
print('\nConfusion Matrix:\n',confusion_matrix(y_test, y_pred))
print('\nClassification Report:\n',classification_report(y_test, y_pred))

Accuracy: 0.9991748885221726

Confusion Matrix:
 [[56851    13]
 [   34    64]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.83      0.65      0.73        98

    accuracy                           1.00     56962
   macro avg       0.92      0.83      0.87     56962
weighted avg       1.00      1.00      1.00     56962



In [12]:
# predict a new transaction legit or fraud
transaction = x_test[0]
transaction = transaction.reshape(1,-1)
prediction = model.predict(transaction)
if prediction[0] == 0:
    print('Legit Transaction')
else:
    print('Fraud Transaction')

Legit Transaction


In [13]:
#this code is to check which rows are fraud in tg
fraud_indices = np.where(y_test == 1)[0]
print('Fraud transactions are at these indices in x_test:',fraud_indices)

Fraud transactions are at these indices in x_test: [  840  1146  3287  4276  5077  5453  7164  7299  7337  9036  9156  9179
  9516  9730  9770 10130 10623 12266 12588 13968 15712 16303 16715 17046
 18076 19638 20216 20472 20687 20971 20992 22956 23090 23679 24570 24869
 25468 26685 26892 28390 28867 29865 30275 30513 30724 31208 31297 31804
 32115 32200 33111 34124 35880 37511 37564 39206 39433 39768 40860 41174
 41358 41517 42712 43032 43261 43479 43547 45600 46122 46403 46497 46841
 47658 48945 48975 49527 50025 50557 50636 50678 50818 51243 52728 52772
 52778 53376 53979 54064 54191 54701 54752 54894 54930 55071 55247 56143
 56287 56421]


In [14]:
transaction = x_test[33111]
transaction = transaction.reshape(1,-1)
prediction = model.predict(transaction)
if prediction[0] == 0:
    print('Legit Transaction')
else:
    print('Fraud Transaction')

Fraud Transaction


In [15]:
#calculate accuracy
accuracy = accuracy_score(y_test,y_pred)
print('Accuracy:',accuracy)

Accuracy: 0.9991748885221726


In [16]:
#convert it into percentage
print('Accuracy:',accuracy*100,'%')

Accuracy: 99.91748885221726 %
