In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix,classification_report,f1_score,roc_auc_score

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
dataset= pd.read_csv('credit_card_dataset_DP.csv')
dataset

Unnamed: 0,amount,transaction_hour,foreign_transaction,location_mismatch,device_trust_score,velocity_last_24h,cardholder_age,is_fraud,merchant_category_Electronics,merchant_category_Food,merchant_category_Grocery,merchant_category_Travel
0,84.47,22,0,0,66,3,40,0,1,0,0,0
1,541.82,3,1,0,87,1,64,0,0,0,0,1
2,237.01,17,0,0,49,1,61,0,0,0,1,0
3,164.33,4,0,1,72,3,34,0,0,0,1,0
4,30.53,15,0,0,79,0,44,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,350.91,22,0,0,99,4,37,0,0,1,0,0
9996,410.04,5,0,0,70,3,25,0,0,0,0,0
9997,527.75,21,0,0,44,2,45,0,1,0,0,0
9998,91.20,2,0,0,38,0,37,0,1,0,0,0


In [4]:
# Split data into indep and dep
indep = dataset.drop('is_fraud',axis=1)
dep = dataset['is_fraud']

In [5]:
indep

Unnamed: 0,amount,transaction_hour,foreign_transaction,location_mismatch,device_trust_score,velocity_last_24h,cardholder_age,merchant_category_Electronics,merchant_category_Food,merchant_category_Grocery,merchant_category_Travel
0,84.47,22,0,0,66,3,40,1,0,0,0
1,541.82,3,1,0,87,1,64,0,0,0,1
2,237.01,17,0,0,49,1,61,0,0,1,0
3,164.33,4,0,1,72,3,34,0,0,1,0
4,30.53,15,0,0,79,0,44,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,350.91,22,0,0,99,4,37,0,1,0,0
9996,410.04,5,0,0,70,3,25,0,0,0,0
9997,527.75,21,0,0,44,2,45,1,0,0,0
9998,91.20,2,0,0,38,0,37,1,0,0,0


In [6]:
dep

0       0
1       0
2       0
3       0
4       0
       ..
9995    0
9996    0
9997    0
9998    0
9999    0
Name: is_fraud, Length: 10000, dtype: int64

In [7]:
# train_test_split data 
X_train,X_test,y_train,y_test = train_test_split(indep,dep,test_size= 0.2,random_state=0)

#Standard scaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

#Smote for data imbalance
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train_scaled, y_train)
print("Before SMOTE:\n", y_train.value_counts())
print("After SMOTE:\n", y_train_smote.value_counts())

Before SMOTE:
 is_fraud
0    7874
1     126
Name: count, dtype: int64
After SMOTE:
 is_fraud
0    7874
1    7874
Name: count, dtype: int64


In [8]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

param_grid = {'kernel':['linear','rbf'],
             'gamma':['auto','scale'],
             'C':[1,10,100]} 

grid = GridSearchCV(SVC(probability=True), param_grid, refit = True, cv=3,verbose = 3,n_jobs=-1,scoring='f1_weighted') 
   
# fitting the model for grid search 
grid.fit(X_train_smote, y_train_smote) 

Fitting 3 folds for each of 12 candidates, totalling 36 fits


In [9]:
re=grid.cv_results_
grid_predictions = grid.predict(X_test_scaled) 
cm = confusion_matrix(y_test, grid_predictions)
clf_report = classification_report(y_test, grid_predictions)
f1_macro=f1_score(y_test,grid_predictions,average='weighted')
roc_score = roc_auc_score(y_test,grid.predict_proba(X_test_scaled)[:,1])

In [10]:
print("The f1_macro value for best parameter {}:".format(grid.best_params_),f1_macro)
print("\nThe confusion Matrix:\n",cm)
print("\nThe report:\n",clf_report)
print("\nROC_AUC_Score:",roc_score)

The f1_macro value for best parameter {'C': 100, 'gamma': 'scale', 'kernel': 'rbf'}: 0.989

The confusion Matrix:
 [[1964   11]
 [  11   14]]

The report:
               precision    recall  f1-score   support

           0       0.99      0.99      0.99      1975
           1       0.56      0.56      0.56        25

    accuracy                           0.99      2000
   macro avg       0.78      0.78      0.78      2000
weighted avg       0.99      0.99      0.99      2000


ROC_AUC_Score: 0.9664506329113924


In [11]:
table=pd.DataFrame.from_dict(re)
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_gamma,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,5.520876,0.815996,0.097167,0.002103,1,auto,linear,"{'C': 1, 'gamma': 'auto', 'kernel': 'linear'}",0.983046,0.97942,0.980564,0.98101,0.001514,11
1,6.336358,0.321601,0.35435,0.009255,1,auto,rbf,"{'C': 1, 'gamma': 'auto', 'kernel': 'rbf'}",0.992952,0.991617,0.992951,0.992507,0.000629,5
2,4.885213,0.38518,0.0949,0.005573,1,scale,linear,"{'C': 1, 'gamma': 'scale', 'kernel': 'linear'}",0.983046,0.97942,0.980564,0.98101,0.001514,11
3,6.773983,0.059938,0.361113,0.00708,1,scale,rbf,"{'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}",0.99219,0.991236,0.991998,0.991808,0.000412,6
4,10.663271,0.248483,0.100758,0.002171,10,auto,linear,"{'C': 10, 'gamma': 'auto', 'kernel': 'linear'}",0.983236,0.97942,0.980564,0.981074,0.001599,9
5,4.093978,0.091724,0.181357,0.0038,10,auto,rbf,"{'C': 10, 'gamma': 'auto', 'kernel': 'rbf'}",0.996381,0.995428,0.99619,0.995999,0.000412,3
6,10.705721,0.243789,0.092494,0.00743,10,scale,linear,"{'C': 10, 'gamma': 'scale', 'kernel': 'linear'}",0.983236,0.97942,0.980564,0.981074,0.001599,9
7,4.415094,0.051506,0.199249,0.007914,10,scale,rbf,"{'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}",0.99619,0.994856,0.995237,0.995428,0.000561,4
8,29.434109,0.840216,0.070272,0.010009,100,auto,linear,"{'C': 100, 'gamma': 'auto', 'kernel': 'linear'}",0.983427,0.97942,0.980564,0.981137,0.001685,7
9,3.836554,0.072469,0.148632,0.003643,100,auto,rbf,"{'C': 100, 'gamma': 'auto', 'kernel': 'rbf'}",0.99619,0.995809,0.99619,0.996063,0.00018,2


In [12]:
amount_input = float(input("Enter amount:"))
transhr_input = int(input("Enter transaction hour:"))
foreigntrans_input = int(input("Enter foreign transaction (yes(1)/No(0)):"))
loc_mismatch_input = int(input("Enter Location mismatch (yes(1)/No(0)):"))
dev_ts_input = int(input("Enter Device trust score:"))
velocity24h_input = int(input("Enter frequency of transaction in 24 hrs:"))
cardholder_age_input = int(input("Enter card holder age:"))
MCE_input = int(input("Enter Merchant category - Electronics (yes(1)/No(0)):"))
MCF_input = int(input("Enter Merchant category - Food (yes(1)/No(0)):")) 
MCG_input = int(input("Enter Merchant category - Grocery (yes(1)/No(0)):"))
MCT_input = int(input("Enter Merchant category - Travel (yes(1)/No(0)):"))

Enter amount: 10000
Enter transaction hour: 1
Enter foreign transaction (yes(1)/No(0)): 0
Enter Location mismatch (yes(1)/No(0)): 0
Enter Device trust score: 50
Enter frequency of transaction in 24 hrs: 4
Enter card holder age: 26
Enter Merchant category - Electronics (yes(1)/No(0)): 0
Enter Merchant category - Food (yes(1)/No(0)): 1
Enter Merchant category - Grocery (yes(1)/No(0)): 0
Enter Merchant category - Travel (yes(1)/No(0)): 0


In [14]:
user_data=[[amount_input,transhr_input,foreigntrans_input,loc_mismatch_input,dev_ts_input,velocity24h_input,cardholder_age_input,MCE_input,MCF_input,MCG_input,MCT_input]]
user_data_scaled = scaler.transform(user_data)
future_prediction = grid.predict(user_data_scaled)
print("Credit Card Fraud Prediction =", future_prediction[0])

Credit Card Fraud Prediction = 0
