In [24]:
!python -m pip install --upgrade pip

!pip install lightgbm==3.3.2

!pip install xgboost
!pip install imbalanced-learn



















In [25]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE

In [26]:
df = pd.read_csv('german_credit_data.csv')
df.fillna('unknown', inplace=True)


In [27]:
# Encoding categorical variables
le = LabelEncoder()
for col in ['Sex', 'Housing', 'Saving accounts', 'Checking account', 'Purpose']:
    df[col] = le.fit_transform(df[col])


In [28]:
# Define features and target variable
X = df.drop('Credit amount', axis=1)
y = (df['Credit amount'] < 5000).astype(int)  # Simplified risk labeling

In [29]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [30]:

# Apply SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)

In [31]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, stratify=y, random_state=42)

# Define models and parameter grids
models = {
    'Logistic Regression': (LogisticRegression(class_weight='balanced'), {'C': [0.1, 1, 10]}),
    'Decision Tree': (DecisionTreeClassifier(class_weight='balanced'), {'max_depth': [3, 5, 10]}),
    'Random Forest': (RandomForestClassifier(class_weight='balanced'), {'n_estimators': [50, 100], 'max_depth': [5, 10]}),
    'XGBoost': (XGBClassifier(use_label_encoder=False, eval_metric='logloss'), {'n_estimators': [50, 100]}),
    'LightGBM': (LGBMClassifier(), {'n_estimators': [50, 100]}),
    'SVM': (SVC(), {'C': [1, 10]}),
    'ANN': (MLPClassifier(max_iter=300), {'hidden_layer_sizes': [(50,), (100,)], 'alpha': [0.0001, 0.001]})
}

In [32]:
# Perform training and evaluation
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
results = {}

for name, (model, params) in models.items():
    print(f"Training {name}...")
    grid = GridSearchCV(model, params, cv=cv, scoring='f1', n_jobs=-1)
    grid.fit(X_train, y_train)
    y_pred = grid.predict(X_test)
    report = classification_report(y_test, y_pred, output_dict=True)
    results[name] = report
    print(classification_report(y_test, y_pred))


Training Logistic Regression...
              precision    recall  f1-score   support

           0       0.44      0.74      0.55        38
           1       0.93      0.78      0.85       162

    accuracy                           0.78       200
   macro avg       0.69      0.76      0.70       200
weighted avg       0.84      0.78      0.79       200

Training Decision Tree...
              precision    recall  f1-score   support

           0       0.43      0.68      0.53        38
           1       0.91      0.79      0.85       162

    accuracy                           0.77       200
   macro avg       0.67      0.74      0.69       200
weighted avg       0.82      0.77      0.79       200

Training Random Forest...
              precision    recall  f1-score   support

           0       0.56      0.53      0.54        38
           1       0.89      0.90      0.90       162

    accuracy                           0.83       200
   macro avg       0.72      0.71      0.72 

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


              precision    recall  f1-score   support

           0       0.55      0.45      0.49        38
           1       0.88      0.91      0.89       162

    accuracy                           0.82       200
   macro avg       0.71      0.68      0.69       200
weighted avg       0.81      0.82      0.82       200

Training LightGBM...
              precision    recall  f1-score   support

           0       0.62      0.42      0.50        38
           1       0.87      0.94      0.90       162

    accuracy                           0.84       200
   macro avg       0.74      0.68      0.70       200
weighted avg       0.82      0.84      0.83       200

Training SVM...
              precision    recall  f1-score   support

           0       0.60      0.39      0.48        38
           1       0.87      0.94      0.90       162

    accuracy                           0.83       200
   macro avg       0.73      0.67      0.69       200
weighted avg       0.82      0.83    

