In [24]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from google.colab import files
df = pd.read_csv("credit_data.csv")
from google.colab import files
uploaded = files.upload()


# Drop non-relevant columns
df.drop(columns=["CustomerID"], inplace=True)

# Handle missing values
df.fillna(df.mean(), inplace=True)

# Define features and target variable
features = ["Age", "Income", "LoanAmount"]
target = "CreditScore"

# Normalize numerical features
scaler = StandardScaler()
df[features] = scaler.fit_transform(df[features])

# Train-test split (90% training, 10% testing)
X_train, X_test, y_train, y_test = train_test_split(df[features], df[target], test_size=0.1, random_state=42)

# Use Gradient Boosting Regressor for better performance
model = GradientBoostingRegressor(n_estimators=500, learning_rate=0.1, max_depth=5, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluate model
print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))


sample_data = pd.DataFrame([[30, 50000, 10000]], columns=features)  # Sample input
sample_data_scaled = pd.DataFrame(scaler.transform(sample_data), columns=features)  # Preserve feature names

# Predict Credit Score
predicted_score = model.predict(sample_data_scaled)
print("Predicted Credit Score:", predicted_score[0])

Saving credit_data.csv to credit_data (5).csv
Mean Absolute Error: 230.97352828563066
Mean Squared Error: 82218.78233651051
R² Score: -7.304717793642636
Predicted Credit Score: 618.4428649768716
