<a href="https://colab.research.google.com/github/Nitinsen001/CodeAlpha-Credit-Scoring-Model/blob/main/Credit_Scoring_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [42]:
!pip install ucimlrepo




In [43]:
from ucimlrepo import fetch_ucirepo
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score

# 1. Dataset Load
statlog = fetch_ucirepo(id=144)  # German Credit Data
X = statlog.data.features
y = statlog.data.targets  # Labels: 1 = good, 2 = bad

# Map to 0/1
y = y.replace({1: 1, 2: 0})

# 2. EDA (quick look)
print(X.head())
print(X.info())
print(y.value_counts())

# 3. Split Data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

# 4. Preprocessing: Identify categorical and numerical features, then apply transformations
categorical_features = X.select_dtypes(include=['object']).columns
numerical_features = X.select_dtypes(include=['int64', 'float64']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])

# 5. Train Models using Pipelines
# Logistic Regression Pipeline
log_reg_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                                   ('classifier', LogisticRegression(class_weight="balanced", max_iter=1000))])

log_reg_pipeline.fit(X_train, y_train)

# Random Forest Pipeline
rf_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                             ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))])

rf_pipeline.fit(X_train, y_train)


# 6. Evaluate Models
for name, model in [
    ("Logistic Regression", log_reg_pipeline),
    ("Random Forest", rf_pipeline)
]:
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1]
    print(f"\n{name} Results:")
    print(classification_report(y_test, y_pred))
    print("ROC-AUC:", roc_auc_score(y_test, y_proba))

# 7. Make simplified predictions (using Random Forest as an example)
predictions = rf_pipeline.predict(X_test)

# Map predictions to "Creditworthy" or "Not Creditworthy"
simplified_predictions = ["Creditworthy" if pred == 1 else "Not Creditworthy" for pred in predictions]

print("\nSimplified Predictions on Test Data (using Random Forest):")
for i, prediction in enumerate(simplified_predictions[:10]): # Print first 10 for brevity
    print(f"Sample {i+1}: {prediction}")

  Attribute1  Attribute2 Attribute3 Attribute4  Attribute5 Attribute6  \
0        A11           6        A34        A43        1169        A65   
1        A12          48        A32        A43        5951        A61   
2        A14          12        A34        A46        2096        A61   
3        A11          42        A32        A42        7882        A61   
4        A11          24        A33        A40        4870        A61   

  Attribute7  Attribute8 Attribute9 Attribute10  Attribute11 Attribute12  \
0        A75           4        A93        A101            4        A121   
1        A73           2        A92        A101            2        A121   
2        A74           2        A93        A101            3        A121   
3        A74           2        A93        A103            4        A122   
4        A73           3        A93        A101            4        A124   

   Attribute13 Attribute14 Attribute15  Attribute16 Attribute17  Attribute18  \
0           67        A1

  y = column_or_1d(y, warn=True)
  return fit_method(estimator, *args, **kwargs)



Logistic Regression Results:
              precision    recall  f1-score   support

           0       0.53      0.69      0.60        90
           1       0.85      0.73      0.79       210

    accuracy                           0.72       300
   macro avg       0.69      0.71      0.69       300
weighted avg       0.75      0.72      0.73       300

ROC-AUC: 0.777883597883598

Random Forest Results:
              precision    recall  f1-score   support

           0       0.66      0.37      0.47        90
           1       0.77      0.92      0.84       210

    accuracy                           0.75       300
   macro avg       0.72      0.64      0.66       300
weighted avg       0.74      0.75      0.73       300

ROC-AUC: 0.7633597883597883

Simplified Predictions on Test Data (using Random Forest):
Sample 1: Creditworthy
Sample 2: Not Creditworthy
Sample 3: Creditworthy
Sample 4: Creditworthy
Sample 5: Creditworthy
Sample 6: Creditworthy
Sample 7: Creditworthy
Sample 8: Cr