In [81]:
import pandas as pd
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt

In [82]:
df = pd.read_csv(r'C:\Dataset\PROJECTS\creditcard.csv')
df.drop(columns=df.loc[:, 'V6':'V28'].columns, inplace=True)


In [83]:
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,69.99,0


In [84]:
df.isnull().sum()

Time      0
V1        0
V2        0
V3        0
V4        0
V5        0
Amount    0
Class     0
dtype: int64

In [85]:
df.describe()

Unnamed: 0,Time,V1,V2,V3,V4,V5,Amount,Class
count,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0
mean,94813.859575,1.168375e-15,3.416908e-16,-1.379537e-15,2.074095e-15,9.604066e-16,88.349619,0.001727
std,47488.145955,1.958696,1.651309,1.516255,1.415869,1.380247,250.120109,0.041527
min,0.0,-56.40751,-72.71573,-48.32559,-5.683171,-113.7433,0.0,0.0
25%,54201.5,-0.9203734,-0.5985499,-0.8903648,-0.8486401,-0.6915971,5.6,0.0
50%,84692.0,0.0181088,0.06548556,0.1798463,-0.01984653,-0.05433583,22.0,0.0
75%,139320.5,1.315642,0.8037239,1.027196,0.7433413,0.6119264,77.165,0.0
max,172792.0,2.45493,22.05773,9.382558,16.87534,34.80167,25691.16,1.0


In [86]:
df.shape

(284807, 8)

In [87]:
print(f"Number of duplicate rows: {df.duplicated().sum()}")

Number of duplicate rows: 1081


In [88]:
df.drop_duplicates(inplace=True)

In [89]:
print(f"Number of duplicate rows: {df.duplicated().sum()}")

Number of duplicate rows: 0


In [90]:
X = df.drop("Class", axis=1)
y = df["Class"]  # 1 = fraud, 0 = genuine


In [91]:
from sklearn.preprocessing import StandardScaler

In [92]:
sc = StandardScaler()
sc.fit_transform(df[['Time', 'Amount']])

array([[-1.99682292,  0.24419951],
       [-1.99682292, -0.34258399],
       [-1.99680186,  1.15889967],
       ...,
       [ 1.64227757, -0.0822395 ],
       [ 1.64227757, -0.31339058],
       [ 1.64236181,  0.51329005]])

In [93]:
from sklearn.model_selection import train_test_split

In [94]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [95]:
from sklearn.ensemble import RandomForestClassifier

In [96]:
rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)


In [97]:
rfc.score(X_test, y_test)*100

99.9066013463504

In [98]:
from sklearn.metrics import classification_report

In [99]:
y_pred = rfc.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56656
           1       0.93      0.44      0.60        90

    accuracy                           1.00     56746
   macro avg       0.96      0.72      0.80     56746
weighted avg       1.00      1.00      1.00     56746



In [100]:
def predict_transaction(transaction_data, rfc, sc):
    # transaction_data: list or dict of 30 features including scaled 'Time' and 'Amount'
    df = pd.DataFrame([transaction_data])
    
    prediction = rfc.predict(df)[0]
    return "Fraudulent" if prediction == 1 else "Genuine"



In [106]:
sample_transaction = {
    'Time': 50000,
    'V1': -1.3598071336738,
    'V2': -0.0727811733098497,
    'V3': 2.53634673796914,
    'V4': 1.37815522427443,
    'V5': -0.338320769942518,
    'Amount': 149.62
}

print(predict_transaction(sample_transaction, rfc, sc))


Genuine


In [107]:
sample_transaction = {
    'Time': 40660,
    'V1': -2.3122265423263,
    'V2': 1.95199201064158,
    'V3': -1.60985073222,
    'V4': 3.9979055875468,
    'V5': -0.522187864667764,
    'Amount': 0.00
}

print(predict_transaction(sample_transaction, rfc, sc))


Fraudulent


In [103]:
import pickle

# Save model to file
with open('creditcrad_model.pkl', 'wb') as f:
    pickle.dump(rfc, f)

print("Model saved as creditcrad_model.pkl")


Model saved as creditcrad_model.pkl
