In [None]:
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score


In [None]:
# Load data
df = pd.read_csv("data/customers.csv")

features = [
    'transaction_count',
    'avg_transaction_value',
    'total_spend',
    'account_age_days'
]

X = df[features]


In [None]:
# Create target variable (high risk vs others)
df['high_risk'] = (df['transaction_count'] < df['transaction_count'].median()).astype(int)

y = df['high_risk']


In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)


In [None]:
# Scale features
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
# Logistic Regression model
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

y_pred = model.predict(X_test_scaled)
y_prob = model.predict_proba(X_test_scaled)[:, 1]


In [None]:
# Evaluation
print(classification_report(y_test, y_pred))
print("ROC-AUC:", roc_auc_score(y_test, y_prob))


In [None]:
# Feature importance
importance = pd.Series(model.coef_[0], index=features).sort_values()

importance.plot(kind='barh')
plt.title("Feature Importance for Risk Prediction")
plt.xlabel("Coefficient Value")
plt.show()
