In [None]:
!pip install -q pandas scikit-learn imbalanced-learn

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pickle

In [None]:
df = pd.read_csv("creditcard.csv")

In [None]:
df = df.dropna(subset=["Class"])

In [None]:
X = df.drop("Class", axis=1)
y = df["Class"]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, stratify=y, random_state=42
)

In [None]:
sm = SMOTE(random_state=42)
X_resampled, y_resampled = sm.fit_resample(X_train, y_train)

In [None]:
model = RandomForestClassifier(
    n_estimators=50,
    random_state=42
)
model.fit(X_resampled, y_resampled)

In [None]:
y_train_pred = model.predict(X_resampled)
y_test_pred = model.predict(X_test)


In [None]:
print("🔹 Train Accuracy:", accuracy_score(y_resampled, y_train_pred))
print("🔹 Test Accuracy:", accuracy_score(y_test, y_test_pred))

🔹 Train Accuracy: 0.9758975970286443
🔹 Test Accuracy: 0.9943681357265245


In [None]:
print("\n📊 Confusion Matrix:")
print(confusion_matrix(y_test, y_test_pred))


📊 Confusion Matrix:
[[70695   384]
 [   17   106]]


In [None]:
print("\n📄 Classification Report:")
print(classification_report(y_test, y_test_pred, target_names=["Genuine", "Fraud"]))


📄 Classification Report:
              precision    recall  f1-score   support

     Genuine       1.00      0.99      1.00     71079
       Fraud       0.22      0.86      0.35       123

    accuracy                           0.99     71202
   macro avg       0.61      0.93      0.67     71202
weighted avg       1.00      0.99      1.00     71202



In [None]:
with open("fraud_model.pkl", "wb") as f:
    pickle.dump(model, f)

In [None]:
new_data = df.drop("Class", axis=1).sample(n=5, random_state=42)
new_data.to_csv("new_transactions.csv", index=False)
print("'new_transactions.csv' created with 5 sample rows.")

'new_transactions.csv' created with 5 sample rows.


In [None]:
with open("fraud_model.pkl", "rb") as f:
    loaded_model = pickle.load(f)

In [None]:
new_data["Prediction"] = loaded_model.predict(new_data)

In [None]:
new_data.to_csv("predicted_transactions.csv", index=False)
print("'predicted_transactions.csv' saved with predictions.")

'predicted_transactions.csv' saved with predictions.
