In [49]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder


In [50]:
df=pd.read_csv('Banking_Fraud_Dataset.csv')

In [51]:
df

Unnamed: 0,Transaction_ID,Customer_ID,Transaction_Amount,Transaction_Type,Transaction_Location,Transaction_Time,Device_Used,Account_Age,Credit_Score,Previous_Fraud,Is_Fraud
0,T1,C2539,8527.58,Deposit,"Hughesmouth, Mongolia",2025-01-20 03:17:34,Mobile,16,740,0,0
1,T2,C5318,9275.82,Deposit,"Huntville, Saint Pierre and Miquelon",2025-01-03 18:08:56,Web,5,848,0,1
2,T3,C8262,2202.49,Online Payment,"Patriciashire, Iceland",2025-01-24 05:54:01,ATM,2,616,1,1
3,T4,C3865,9352.32,Deposit,"Port Timothymouth, Palau",2025-01-19 01:15:30,ATM,9,437,1,0
4,T5,C7248,2081.75,Online Payment,"Lake Stevenfurt, Mauritius",2025-01-08 18:06:13,ATM,15,494,0,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,T9996,C2340,3678.03,Online Payment,"Port Matthew, Malawi",2025-01-14 05:47:08,Web,1,439,1,1
9996,T9997,C6185,6041.40,Transfer,"Lanceville, Swaziland",2025-01-08 04:40:34,POS Terminal,4,738,1,1
9997,T9998,C6114,4755.10,Online Payment,"North Scottmouth, Guinea-Bissau",2025-01-17 05:00:54,POS Terminal,18,649,1,1
9998,T9999,C4483,1649.96,Deposit,"North Miranda, Estonia",2025-01-23 09:44:47,POS Terminal,10,838,1,1


In [52]:
df=pd.get_dummies(df,columns=['Transaction_Type','Device_Used'],drop_first=True)

In [53]:
df

Unnamed: 0,Transaction_ID,Customer_ID,Transaction_Amount,Transaction_Location,Transaction_Time,Account_Age,Credit_Score,Previous_Fraud,Is_Fraud,Transaction_Type_Online Payment,Transaction_Type_Transfer,Transaction_Type_Withdrawal,Device_Used_Mobile,Device_Used_POS Terminal,Device_Used_Web
0,T1,C2539,8527.58,"Hughesmouth, Mongolia",2025-01-20 03:17:34,16,740,0,0,False,False,False,True,False,False
1,T2,C5318,9275.82,"Huntville, Saint Pierre and Miquelon",2025-01-03 18:08:56,5,848,0,1,False,False,False,False,False,True
2,T3,C8262,2202.49,"Patriciashire, Iceland",2025-01-24 05:54:01,2,616,1,1,True,False,False,False,False,False
3,T4,C3865,9352.32,"Port Timothymouth, Palau",2025-01-19 01:15:30,9,437,1,0,False,False,False,False,False,False
4,T5,C7248,2081.75,"Lake Stevenfurt, Mauritius",2025-01-08 18:06:13,15,494,0,0,True,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,T9996,C2340,3678.03,"Port Matthew, Malawi",2025-01-14 05:47:08,1,439,1,1,True,False,False,False,False,True
9996,T9997,C6185,6041.40,"Lanceville, Swaziland",2025-01-08 04:40:34,4,738,1,1,False,True,False,False,True,False
9997,T9998,C6114,4755.10,"North Scottmouth, Guinea-Bissau",2025-01-17 05:00:54,18,649,1,1,True,False,False,False,True,False
9998,T9999,C4483,1649.96,"North Miranda, Estonia",2025-01-23 09:44:47,10,838,1,1,False,False,False,False,True,False


In [54]:
df['Transaction_Time']=pd.to_datetime(df['Transaction_Time'])

In [55]:
df['Hour']=df['Transaction_Time'].dt.hour

In [56]:
df['DateOfWeek']=df['Transaction_Time'].dt.dayofweek

In [57]:
df['Month']=df['Transaction_Time'].dt.month

In [58]:
df.drop(['Transaction_Time','Transaction_ID','Customer_ID','Transaction_Location'],axis=1, inplace=True)

In [59]:
df

Unnamed: 0,Transaction_Amount,Account_Age,Credit_Score,Previous_Fraud,Is_Fraud,Transaction_Type_Online Payment,Transaction_Type_Transfer,Transaction_Type_Withdrawal,Device_Used_Mobile,Device_Used_POS Terminal,Device_Used_Web,Hour,DateOfWeek,Month
0,8527.58,16,740,0,0,False,False,False,True,False,False,3,0,1
1,9275.82,5,848,0,1,False,False,False,False,False,True,18,4,1
2,2202.49,2,616,1,1,True,False,False,False,False,False,5,4,1
3,9352.32,9,437,1,0,False,False,False,False,False,False,1,6,1
4,2081.75,15,494,0,0,True,False,False,False,False,False,18,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,3678.03,1,439,1,1,True,False,False,False,False,True,5,1,1
9996,6041.40,4,738,1,1,False,True,False,False,True,False,4,2,1
9997,4755.10,18,649,1,1,True,False,False,False,True,False,5,4,1
9998,1649.96,10,838,1,1,False,False,False,False,True,False,9,3,1


In [60]:
x=df.drop(['Is_Fraud'],axis=1)
y=df['Is_Fraud']

In [61]:
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, stratify=y, random_state=42
)


In [62]:
print(x.shape) 
print(y.shape) 

(10000, 13)
(10000,)


In [63]:
scaler=StandardScaler()
x_train_scaled=scaler.fit_transform(x_train)
x_test_scaled=scaler.transform(x_test)

In [64]:
model=RandomForestClassifier(random_state=42)


In [65]:
model.fit(x_train_scaled,y_train)

In [66]:
y_pred=model.predict(x_test_scaled)
print("y_pred shape:", y_pred.shape)
print("y_test shape:", y_test.shape)

y_pred shape: (2000,)
y_test shape: (2000,)


In [67]:
from sklearn.metrics import confusion_matrix

print("Confusion Matrix", confusion_matrix(y_test, y_pred))
print("Classification Report", classification_report(y_test, y_pred))

Confusion Matrix [[1297  135]
 [ 426  142]]
Classification Report               precision    recall  f1-score   support

           0       0.75      0.91      0.82      1432
           1       0.51      0.25      0.34       568

    accuracy                           0.72      2000
   macro avg       0.63      0.58      0.58      2000
weighted avg       0.68      0.72      0.68      2000

