In [None]:
# 1. Importing Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier


In [None]:
# 2. Load Dataset (Replace with your own file)
# For demonstration, we'll use a synthetic credit dataset from sklearn
from sklearn.datasets import make_classification

X, y = make_classification(n_samples=1000, n_features=10, n_informative=6, n_redundant=2, 
                           n_classes=2, random_state=42)

# Convert to DataFrame
df = pd.DataFrame(X, columns=[f'feature_{i}' for i in range(10)])
df['creditworthy'] = y

df.head()


In [None]:
# 3. Explore Data
print(df.info())
print(df.describe())
sns.countplot(x='creditworthy', data=df)
plt.title('Class Distribution')
plt.show()


In [None]:
# 4. Feature Engineering (here, already numeric - scale features)
X = df.drop('creditworthy', axis=1)
y = df['creditworthy']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [None]:
# 5. Split Data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [None]:
# 6. Model Training

# Logistic Regression
log_model = LogisticRegression()
log_model.fit(X_train, y_train)

# Decision Tree
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)

# Random Forest
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)


In [None]:
# 7. Model Evaluation Function
def evaluate_model(model, name):
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1]
    
    print(f"\n=== {name} ===")
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("\nClassification Report:\n", classification_report(y_test, y_pred))
    print(f"ROC-AUC Score: {roc_auc_score(y_test, y_proba):.2f}")
    
    fpr, tpr, _ = roc_curve(y_test, y_proba)
    plt.plot(fpr, tpr, label=f'{name} (AUC = {roc_auc_score(y_test, y_proba):.2f})')

# Plot ROC Curves
plt.figure(figsize=(10, 6))
evaluate_model(log_model, "Logistic Regression")
evaluate_model(dt_model, "Decision Tree")
evaluate_model(rf_model, "Random Forest")
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve Comparison')
plt.legend()
plt.grid(True)
plt.show()
