# FraudGuard – Model Training and Risk Scoring


In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score


In [2]:
df = pd.read_csv("../data/creditcard.csv")

# Recreate engineered features
df = df.sort_values("Time").reset_index(drop=True)
df['hour'] = (df['Time'] // 3600) % 24
df['is_night_txn'] = df['hour'].apply(lambda x: 1 if x < 6 else 0)
df['log_amount'] = np.log1p(df['Amount'])
df['rolling_mean_amount'] = df['Amount'].rolling(window=10, min_periods=1).mean()
df['amount_deviation'] = df['Amount'] - df['rolling_mean_amount']
df['time_since_last_txn'] = df['Time'].diff().fillna(0)
df['high_velocity'] = df['time_since_last_txn'].apply(lambda x: 1 if x < 60 else 0)

df.fillna(0, inplace=True)


In [3]:
features = [
    'hour',
    'is_night_txn',
    'log_amount',
    'amount_deviation',
    'time_since_last_txn',
    'high_velocity'
]

X = df[features]
y = df['Class']


In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)


In [5]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

lr = LogisticRegression(class_weight='balanced', max_iter=1000)
lr.fit(X_train_scaled, y_train)

y_pred_lr = lr.predict(X_test_scaled)
y_prob_lr = lr.predict_proba(X_test_scaled)[:, 1]

print(classification_report(y_test, y_pred_lr))
print("ROC-AUC:", roc_auc_score(y_test, y_prob_lr))


              precision    recall  f1-score   support

           0       1.00      0.76      0.86     56864
           1       0.00      0.50      0.01        98

    accuracy                           0.76     56962
   macro avg       0.50      0.63      0.43     56962
weighted avg       1.00      0.76      0.86     56962

ROC-AUC: 0.6568021588207597


In [6]:
rf = RandomForestClassifier(
    n_estimators=100,
    class_weight='balanced',
    random_state=42,
    n_jobs=-1
)

rf.fit(X_train, y_train)

y_pred_rf = rf.predict(X_test)
y_prob_rf = rf.predict_proba(X_test)[:, 1]

print(classification_report(y_test, y_pred_rf))
print("ROC-AUC:", roc_auc_score(y_test, y_prob_rf))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.50      0.05      0.09        98

    accuracy                           1.00     56962
   macro avg       0.75      0.53      0.55     56962
weighted avg       1.00      1.00      1.00     56962

ROC-AUC: 0.651049352985426


In [7]:
df_test = X_test.copy()
df_test['fraud_probability'] = y_prob_rf
df_test['risk_score'] = df_test['fraud_probability'] * 100

df_test[['fraud_probability', 'risk_score']].head()


Unnamed: 0,fraud_probability,risk_score
263020,0.0,0.0
11378,0.0,0.0
147283,0.0,0.0
219439,0.0,0.0
36939,0.0,0.0


In [8]:
def risk_band(score):
    if score < 30:
        return "Low Risk"
    elif score < 70:
        return "Medium Risk"
    else:
        return "High Risk"

df_test['risk_band'] = df_test['risk_score'].apply(risk_band)
df_test[['risk_score', 'risk_band']].head()


Unnamed: 0,risk_score,risk_band
263020,0.0,Low Risk
11378,0.0,Low Risk
147283,0.0,Low Risk
219439,0.0,Low Risk
36939,0.0,Low Risk


Instead of binary classification, the model outputs a fraud probability which is converted into a risk score (0–100). Transactions are categorized into low, medium, and high risk bands to mimic real-world fraud decision systems.
