# **ETHImpulse Credit Score Prediction Model**

In [15]:
# Install any required packages (if not already available in the Colab environment)
# !pip install -U scikit-learn pandas numpy

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

In [16]:
class CreditScoreModel:
    def __init__(self):
        # Logistic Regression with increased iterations and 'saga' solver
        self.model = LogisticRegression(max_iter=1000, solver='saga')
        self.is_trained = False

    def train_model(self, X, y):
        """
        Train the credit score model.

        Args:
        - X (pd.DataFrame or np.array): Features dataframe or array.
        - y (pd.Series): Target labels (creditworthy: 1 or 0).
        """
        self.model.fit(X, y)
        self.is_trained = True
        print("Model trained successfully!")

    def predict(self, features):
        """
        Predict creditworthiness using the trained model.

        Args:
        - features (pd.DataFrame or np.array): Feature data to predict on.

        Returns:
        - predictions (np.array): Predicted labels (1 for creditworthy, 0 otherwise).
        - probabilities (np.array): Probability of being creditworthy.
        """
        if not self.is_trained:
            raise ValueError("The model is not trained. Call `train_model()` first.")
        predictions = self.model.predict(features)
        probabilities = self.model.predict_proba(features)[:, 1]
        return predictions, probabilities

    def get_feature_importance(self, feature_names):
        """
        Get feature importance based on the model coefficients.

        Args:
        - feature_names (list): List of feature names.

        Returns:
        - importance_df (pd.DataFrame): DataFrame with feature importance.
        """
        if not self.is_trained:
            raise ValueError("The model is not trained. Call `train_model()` first.")
        importance_df = pd.DataFrame({
            "Feature": feature_names,
            "Coefficient": self.model.coef_[0],
        })
        importance_df["Importance"] = importance_df["Coefficient"].abs()
        return importance_df.sort_values(by="Importance", ascending=False)

In [17]:
# Generate synthetic data
np.random.seed(42)
data_size = 1000
data = {
    "income": np.random.normal(50000, 15000, data_size),
    "debt_to_income": np.random.uniform(0.1, 0.8, data_size),
    "credit_utilization": np.random.uniform(0.1, 0.9, data_size),
    "payment_history": np.random.uniform(0.5, 1.0, data_size),
    "length_of_credit_history": np.random.uniform(1, 30, data_size),
}
df = pd.DataFrame(data)
df["creditworthy"] = (
    (0.3 * df["income"] / 100000) +
    (0.2 * (1 - df["debt_to_income"])) +
    (0.25 * (1 - df["credit_utilization"])) +
    (0.15 * df["payment_history"]) +
    (0.1 * df["length_of_credit_history"] / 30)
) > 0.7
df["creditworthy"] = df["creditworthy"].astype(int)

# Features and Target
X = df[["income", "debt_to_income", "credit_utilization", "payment_history", "length_of_credit_history"]]
y = df["creditworthy"]

In [18]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [19]:
# Initialize and train the model
model = CreditScoreModel()
model.train_model(X_train_scaled, y_train)

Model trained successfully!


In [20]:
# Predictions
predictions, probabilities = model.predict(X_test_scaled)

# Feature importance
feature_importance = model.get_feature_importance(X.columns)
print("\nFeature Importance:\n", feature_importance)


Feature Importance:
                     Feature  Coefficient  Importance
2        credit_utilization    -2.719903    2.719903
0                    income     2.432078    2.432078
1            debt_to_income    -2.418196    2.418196
4  length_of_credit_history     1.670255    1.670255
3           payment_history     1.295397    1.295397


In [21]:
# Example prediction for a new user
new_user = pd.DataFrame({
    "income": [60000],
    "debt_to_income": [0.1],
    "credit_utilization": [0.4],
    "payment_history": [0.9],
    "length_of_credit_history": [10],
})

# Scale the new user's data
new_user_scaled = scaler.transform(new_user)

# Predict for the new user
pred, prob = model.predict(new_user_scaled)
print(f"\nNew User Prediction: {pred[0]} (Probability: {prob[0]:.2f})")


New User Prediction: 0 (Probability: 0.19)
