In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression 
from sklearn.metrics import confusion_matrix,classification_report,f1_score,roc_auc_score

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
dataset= pd.read_csv('credit_card_dataset_DP.csv')
dataset

Unnamed: 0,amount,transaction_hour,foreign_transaction,location_mismatch,device_trust_score,velocity_last_24h,cardholder_age,is_fraud,merchant_category_Electronics,merchant_category_Food,merchant_category_Grocery,merchant_category_Travel
0,84.47,22,0,0,66,3,40,0,1,0,0,0
1,541.82,3,1,0,87,1,64,0,0,0,0,1
2,237.01,17,0,0,49,1,61,0,0,0,1,0
3,164.33,4,0,1,72,3,34,0,0,0,1,0
4,30.53,15,0,0,79,0,44,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,350.91,22,0,0,99,4,37,0,0,1,0,0
9996,410.04,5,0,0,70,3,25,0,0,0,0,0
9997,527.75,21,0,0,44,2,45,0,1,0,0,0
9998,91.20,2,0,0,38,0,37,0,1,0,0,0


In [4]:
# Split data into indep and dep
indep = dataset.drop('is_fraud',axis=1)
dep = dataset['is_fraud']

In [5]:
indep

Unnamed: 0,amount,transaction_hour,foreign_transaction,location_mismatch,device_trust_score,velocity_last_24h,cardholder_age,merchant_category_Electronics,merchant_category_Food,merchant_category_Grocery,merchant_category_Travel
0,84.47,22,0,0,66,3,40,1,0,0,0
1,541.82,3,1,0,87,1,64,0,0,0,1
2,237.01,17,0,0,49,1,61,0,0,1,0
3,164.33,4,0,1,72,3,34,0,0,1,0
4,30.53,15,0,0,79,0,44,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,350.91,22,0,0,99,4,37,0,1,0,0
9996,410.04,5,0,0,70,3,25,0,0,0,0
9997,527.75,21,0,0,44,2,45,1,0,0,0
9998,91.20,2,0,0,38,0,37,1,0,0,0


In [6]:
dep

0       0
1       0
2       0
3       0
4       0
       ..
9995    0
9996    0
9997    0
9998    0
9999    0
Name: is_fraud, Length: 10000, dtype: int64

In [7]:
# train_test_split data 
X_train,X_test,y_train,y_test = train_test_split(indep,dep,test_size= 0.2,random_state=0)

#Standard scaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

#Smote for data imbalance
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train_scaled, y_train)
print("Before SMOTE:\n", y_train.value_counts())
print("After SMOTE:\n", y_train_smote.value_counts())

Before SMOTE:
 is_fraud
0    7874
1     126
Name: count, dtype: int64
After SMOTE:
 is_fraud
0    7874
1    7874
Name: count, dtype: int64


In [8]:
from sklearn.model_selection import GridSearchCV

param_grid = {'solver':['lbfgs','liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'],
             'penalty':['l1', 'l2', 'elasticnet']} 

grid = GridSearchCV(LogisticRegression(), param_grid, refit = True, verbose = 3,n_jobs=-1,scoring='f1_weighted') 
   
# fitting the model for grid search 
grid.fit(X_train_smote, y_train_smote)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


In [9]:
re=grid.cv_results_
grid_predictions = grid.predict(X_test_scaled) 
cm = confusion_matrix(y_test, grid_predictions)
clf_report = classification_report(y_test, grid_predictions)
f1_macro=f1_score(y_test,grid_predictions,average='weighted')
roc_score = roc_auc_score(y_test,grid.predict_proba(X_test_scaled)[:,1])

In [10]:
print("The f1_macro value for best parameter {}:".format(grid.best_params_),f1_macro)
print("\nThe confusion Matrix:\n",cm)
print("\nThe report:\n",clf_report)
print("\nROC_AUC_Score:",roc_score)

The f1_macro value for best parameter {'penalty': 'l1', 'solver': 'saga'}: 0.9766595806996022

The confusion Matrix:
 [[1915   60]
 [   3   22]]

The report:
               precision    recall  f1-score   support

           0       1.00      0.97      0.98      1975
           1       0.27      0.88      0.41        25

    accuracy                           0.97      2000
   macro avg       0.63      0.92      0.70      2000
weighted avg       0.99      0.97      0.98      2000


ROC_AUC_Score: 0.9878683544303798


In [11]:
table=pd.DataFrame.from_dict(re)
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_penalty,param_solver,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.013662,0.004925,0.0,0.0,l1,lbfgs,"{'penalty': 'l1', 'solver': 'lbfgs'}",,,,,,,,9
1,0.51318,0.012339,0.02309,0.01015,l1,liblinear,"{'penalty': 'l1', 'solver': 'liblinear'}",0.980634,0.979045,0.981267,0.983804,0.979355,0.980821,0.001699,3
2,0.008875,0.00199,0.0,0.0,l1,newton-cg,"{'penalty': 'l1', 'solver': 'newton-cg'}",,,,,,,,9
3,0.011055,0.002436,0.0,0.0,l1,newton-cholesky,"{'penalty': 'l1', 'solver': 'newton-cholesky'}",,,,,,,,9
4,0.011219,0.001005,0.0,0.0,l1,sag,"{'penalty': 'l1', 'solver': 'sag'}",,,,,,,,9
5,1.233685,0.12063,0.014028,0.003482,l1,saga,"{'penalty': 'l1', 'solver': 'saga'}",0.980634,0.979363,0.981267,0.983804,0.979355,0.980885,0.001636,1
6,0.137386,0.043878,0.016696,0.005705,l2,lbfgs,"{'penalty': 'l2', 'solver': 'lbfgs'}",0.980634,0.979045,0.981267,0.983804,0.979355,0.980821,0.001699,3
7,0.248345,0.107272,0.019797,0.005336,l2,liblinear,"{'penalty': 'l2', 'solver': 'liblinear'}",0.980633,0.978092,0.981266,0.983486,0.978719,0.980439,0.001922,8
8,0.303721,0.093763,0.013666,0.003396,l2,newton-cg,"{'penalty': 'l2', 'solver': 'newton-cg'}",0.980951,0.979045,0.981267,0.983804,0.979355,0.980884,0.001697,2
9,0.164671,0.018486,0.013812,0.003548,l2,newton-cholesky,"{'penalty': 'l2', 'solver': 'newton-cholesky'}",0.980634,0.979045,0.981267,0.983804,0.979355,0.980821,0.001699,3


In [None]:
amount_input = float(input("Enter amount:"))
transhr_input = int(input("Enter transaction hour:"))
foreigntrans_input = int(input("Enter foreign transaction (yes(1)/No(0)):"))
loc_mismatch_input = int(input("Enter Location mismatch (yes(1)/No(0)):"))
dev_ts_input = int(input("Enter Device trust score:"))
velocity24h_input = int(input("Enter frequency of transaction in 24 hrs:"))
cardholder_age_input = int(input("Enter card holder age:"))
MCE_input = int(input("Enter Merchant category - Electronics (yes(1)/No(0)):"))
MCF_input = int(input("Enter Merchant category - Food (yes(1)/No(0)):")) 
MCG_input = int(input("Enter Merchant category - Grocery (yes(1)/No(0)):"))
MCT_input = int(input("Enter Merchant category - Travel (yes(1)/No(0)):"))

In [None]:
user_data=[[amount_input,transhr_input,foreigntrans_input,loc_mismatch_input,dev_ts_input,velocity24h_input,cardholder_age_input,MCE_input,MCF_input,MCG_input,MCT_input]]
user_data_scaled = scaler.transform(user_data)
future_prediction = grid.predict(user_data_scaled)
print("Credit Card Fraud Prediction =", future_prediction[0])