In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv("credit_score.csv")


In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.shape

In [None]:
df.columns

In [None]:
df.describe()

In [None]:
df.isnull().sum()

In [None]:
#Check Duplicates
df.duplicated().sum()

In [None]:
#Handle Missing Values
# For simplicity, drop rows with missing values
df = df.dropna()

In [None]:
# Check the value counts of 'Credit_Score' after label encoding
df['Credit_Score'].value_counts()

In [None]:
 #Encode Categorical Variables
from sklearn.preprocessing import LabelEncoder
categorical_cols = df.select_dtypes(include=['object']).columns
for col in categorical_cols:
    le = LabelEncoder()
    # Ensure the column is uniformly string type before encoding
    df[col] = df[col].astype(str)
    df[col] = le.fit_transform(df[col])

In [None]:
# Feature Engineering
# Example features (adjust based on dataset columns)
if 'Income' in df.columns and 'Debt' in df.columns:
    df['Debt_to_Income'] = df['Debt'] / df['Income']

if 'Current_Balance' in df.columns and 'Credit_Limit' in df.columns:
    df['Credit_Utilization'] = df['Current_Balance'] / df['Credit_Limit']


In [None]:
# Assuming 'Credit_Score' is the target; convert to binary: Good (1) if Credit_Score == 2 else Bad (0)
df['Target'] = df['Credit_Score'].apply(lambda x: 1 if x == 2 else 0)

X = df.drop(['Credit_Score', 'Target'], axis=1)
y = df['Target']

In [None]:
# Train test split
from sklearn.model_selection import train_test_split, GridSearchCV

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
from sklearn.preprocessing import StandardScaler

# Scale Features (for Logistic Regression)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Logistic Regression
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression()
lr.fit(X_train_scaled, y_train)
y_pred_lr = lr.predict(X_test_scaled)

In [None]:
# Decision Tree
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)

In [None]:
# Random Forest
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

In [None]:
# Evaluate Models
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

print("Logistic Regression Report:\n", classification_report(y_test, y_pred_lr))
print("Decision Tree Report:\n", classification_report(y_test, y_pred_dt))
print("Random Forest Report:\n", classification_report(y_test, y_pred_rf))


In [None]:
# ROC-AUC Scores
print("Logistic Regression ROC-AUC:", roc_auc_score(y_test, lr.predict_proba(X_test_scaled)[:,1]))
print("Decision Tree ROC-AUC:", roc_auc_score(y_test, dt.predict_proba(X_test)[:,1]))
print("Random Forest ROC-AUC:", roc_auc_score(y_test, rf.predict_proba(X_test)[:,1]))


In [None]:
# Confusion Matrix Example
print("Random Forest Confusion Matrix:\n", confusion_matrix(y_test, y_pred_rf))

In [None]:
#Save the Best Model
import joblib
joblib.dump(rf, "CreditScore_RF_Model.pkl")