In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler

from imblearn.under_sampling import RandomUnderSampler
import joblib


In [None]:
# Load the dataset (make sure 'creditcard.csv' is in the same folder)
df = pd.read_csv('creditcard.csv')
df.head()


In [None]:
# Summary statistics
df.describe()


In [None]:
# Class distribution
sns.countplot(x='Class', data=df)
plt.title("Class Distribution (0 = Legit, 1 = Fraud)")
plt.show()


In [None]:
# Features and target
X = df.drop('Class', axis=1)
y = df['Class']

# Normalize 'Amount' and 'Time'
scaler = StandardScaler()
X[['Amount', 'Time']] = scaler.fit_transform(X[['Amount', 'Time']])


In [None]:
# Under-sample the majority class
rus = RandomUnderSampler(random_state=42)
X_res, y_res = rus.fit_resample(X, y)

# Check new class balance
print(pd.Series(y_res).value_counts())


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.3, random_state=42)


In [None]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [None]:
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


In [None]:
# Save the model
joblib.dump(model, '../api/model.pkl')
print("Model saved to '../api/model.pkl'")
