In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

In [3]:
data = pd.read_csv("Fraud.csv")

In [4]:
data.info

<bound method DataFrame.info of          step      type     amount     nameOrig  oldbalanceOrg  \
0           1   PAYMENT    9839.64  C1231006815      170136.00   
1           1   PAYMENT    1864.28  C1666544295       21249.00   
2           1  TRANSFER     181.00  C1305486145         181.00   
3           1  CASH_OUT     181.00   C840083671         181.00   
4           1   PAYMENT   11668.14  C2048537720       41554.00   
...       ...       ...        ...          ...            ...   
1048570    95  CASH_OUT  132557.35  C1179511630      479803.00   
1048571    95   PAYMENT    9917.36  C1956161225       90545.00   
1048572    95   PAYMENT   14140.05  C2037964975       20545.00   
1048573    95   PAYMENT   10020.05  C1633237354       90605.00   
1048574    95   PAYMENT   11450.03  C1264356443       80584.95   

         newbalanceOrig     nameDest  oldbalanceDest  newbalanceDest  isFraud  \
0             160296.36  M1979787155            0.00            0.00        0   
1            

In [5]:
data.head()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,PAYMENT,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,0,0
1,1,PAYMENT,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,0,0
2,1,TRANSFER,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,0
3,1,CASH_OUT,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,0
4,1,PAYMENT,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,0,0


In [6]:
data_cleaned = data.drop(columns=["nameOrig", "nameDest", "isFlaggedFraud"])


In [7]:
label_encoder = LabelEncoder()
data_cleaned["type"] = label_encoder.fit_transform(data_cleaned["type"])

In [8]:
X = data_cleaned.drop(columns=["isFraud"])
y = data_cleaned["isFraud"]


In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [11]:
X_train

Unnamed: 0,step,type,amount,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest
70606,9,3,2265.26,20950.00,18684.74,0.00,0.00
701854,37,0,364162.66,50066.00,414228.66,0.00,0.00
261358,15,3,4338.74,0.00,0.00,0.00,0.00
412026,18,0,299163.26,820.00,299983.26,0.00,0.00
897616,42,0,25652.02,3522.00,29174.02,219292.18,193640.17
...,...,...,...,...,...,...,...
717386,37,1,86601.56,0.00,0.00,432043.48,518645.05
150120,12,3,52658.94,0.00,0.00,0.00,0.00
346883,16,1,91086.97,51915.00,0.00,119456.90,210543.87
211070,13,1,404737.88,0.00,0.00,16374358.12,16779096.00


In [12]:
y_train

70606     0
701854    0
261358    0
412026    0
897616    0
         ..
717386    0
150120    0
346883    0
211070    0
332322    0
Name: isFraud, Length: 838860, dtype: int64

In [20]:
smote = SMOTE(random_state=42)


In [21]:
model = RandomForestClassifier(n_estimators=50, random_state=42, class_weight="balanced")


In [22]:
X_train_resampled, y_train_resampled = smote.fit_resample(X_train.sample(50000, random_state=42), y_train.sample(50000, random_state=42))


In [23]:
model.fit(X_train_resampled, y_train_resampled)

In [24]:
y_pred = model.predict(X_test)

In [25]:
report = classification_report(y_test, y_pred)

In [26]:
print(report)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00    209487
           1       0.55      0.69      0.61       228

    accuracy                           1.00    209715
   macro avg       0.78      0.84      0.81    209715
weighted avg       1.00      1.00      1.00    209715



In [27]:
import numpy as np


In [28]:
def predict_fraud(input_data):
    input_array = np.array(input_data).reshape(1, -1)
    prediction = model.predict(input_array)
    return "Fraud" if prediction[0] == 1 else "Not Fraud"

In [29]:
sample_input = X_test.iloc[0].values

In [30]:
print("Prediction for sample input:", predict_fraud(sample_input))

Prediction for sample input: Not Fraud


