In [3]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score
from imblearn.over_sampling import SMOTE

In [4]:
data = pd.read_csv("creditcard.csv")

In [5]:
print(data.isnull().sum())

Time      0
V1        0
V2        0
V3        0
V4        0
V5        0
V6        0
V7        0
V8        0
V9        0
V10       0
V11       0
V12       0
V13       0
V14       0
V15       0
V16       0
V17       0
V18       0
V19       0
V20       0
V21       0
V22       0
V23       0
V24       0
V25       0
V26       0
V27       0
V28       0
Amount    0
Class     0
dtype: int64


In [6]:
print(data["Class"].value_counts())

Class
0    284315
1       492
Name: count, dtype: int64


In [7]:
data["hour"] = data["Time"] % 24

In [8]:
X = data.drop("Class", axis=1)
Y = data["Class"]

In [9]:
smote = SMOTE(sampling_strategy=0.5, random_state=42)
X_resampled, Y_resampled = smote.fit_resample(X,Y)

[WinError 2] The system cannot find the file specified
  File "c:\Users\sarth\AppData\Local\Programs\Python\Python313\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
        "wmic CPU Get NumberOfCores /Format:csv".split(),
        capture_output=True,
        text=True,
    )
  File "c:\Users\sarth\AppData\Local\Programs\Python\Python313\Lib\subprocess.py", line 554, in run
    with Popen(*popenargs, **kwargs) as process:
         ~~~~~^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\sarth\AppData\Local\Programs\Python\Python313\Lib\subprocess.py", line 1036, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
    ~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                        pass_fds, cwd, env,
                        ^^^^^^^^^^^^^^^^^^^
    ...<5 lines>...
                        gid, gids, uid, umask,
                        ^^^^^^^^^^^^^^^^^^^^^^
                   

In [10]:
X_train, X_test, Y_train, Y_test = train_test_split(X_resampled, Y_resampled, test_size=0.2, random_state=42)

In [11]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight="balanced")
rf_model.fit(X_train, Y_train)

In [12]:
rf_model.feature_names_in_ = X_train.columns

In [13]:
Y_pred = rf_model.predict(X_test)
print("Classification Report:\n", classification_report(Y_test, Y_pred))
print("Accuracy:", accuracy_score(Y_test, Y_pred))
print("ROC-AUC Score:", roc_auc_score(Y_test, Y_pred))

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56777
           1       1.00      1.00      1.00     28518

    accuracy                           1.00     85295
   macro avg       1.00      1.00      1.00     85295
weighted avg       1.00      1.00      1.00     85295

Accuracy: 0.9998710358168709
ROC-AUC Score: 0.9998944033850268


In [14]:
joblib.dump(rf_model, "rf_fraud_detection.pkl")
print("✅ Model trained and saved as rf_fraud_detection.pkl")

✅ Model trained and saved as rf_fraud_detection.pkl


In [15]:
import joblib
import pandas as pd

In [16]:
rf_model = joblib.load("rf_fraud_detection.pkl")

In [17]:
new_transaction = {
    "Time": 0,
    "V1": -1.3598071336738,
    "V2": -0.0727811733098497,
    "V3": 2.53634673796914,
    "V4": 1.37815522427443,
    "V5": -0.338320769942518,
    "V6": 0.462387777762292,
    "V7": 0.239598554061257,
    "V8": 0.0986979012610507,
    "V9": 0.363786969611213,
    "V10": 0.0907941719789316,
    "V11": -0.551599533260813,
    "V12": -0.617800855762348,
    "V13": -0.991389847235408,
    "V14": -0.311169353699879,
    "V15": 1.46817697209427,
    "V16": -0.470400525259478,
    "V17": 0.207971241929242,
    "V18": 0.0257905801985591,
    "V19": 0.403992960255733,
    "V20": 0.251412098239705,
    "V21": -0.018306777944153,
    "V22": 0.277837575558899,
    "V23": -0.110473910188767,
    "V24": 0.0669280749146731,
    "V25": 0.128539358273528,
    "V26": -0.189114843888824,
    "V27": 0.133558376740387,
    "V28": -0.0210530534538215,
    "Amount": 149.62
}

In [18]:
new_transaction["hour"] = new_transaction["Time"] % 24  

new_transaction_df = pd.DataFrame([new_transaction])

In [19]:
expected_features = rf_model.feature_names_in_
new_transaction_df = new_transaction_df[expected_features]
prediction = rf_model.predict(new_transaction_df)

if prediction == 1:
    print("🚨 Fraud detected!")
else:
    print("✅ Safe transaction.")

✅ Safe transaction.


In [20]:
Y_pred = rf_model.predict(X_test)
print("Classification Report:\n", classification_report(Y_test, Y_pred))
print("Accuracy:", accuracy_score(Y_test, Y_pred))
print("ROC-AUC Score:", roc_auc_score(Y_test, Y_pred))

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56777
           1       1.00      1.00      1.00     28518

    accuracy                           1.00     85295
   macro avg       1.00      1.00      1.00     85295
weighted avg       1.00      1.00      1.00     85295

Accuracy: 0.9998710358168709
ROC-AUC Score: 0.9998944033850268
