In [69]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
import xgboost as xgb
import joblib

In [70]:
cs_data = pd.read_csv("credit_score_dataset.csv")

In [71]:
# Separate features (X) and target (y)
features = cs_data.drop("credit_score", axis=1)
target = cs_data["credit_score"]

In [72]:
# One-hot encode categorical features
categorical_features = features.columns.tolist()  # All features are categorical

In [73]:
preprocessor = ColumnTransformer(
    transformers=[("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features)],
    remainder="passthrough"
)

In [74]:
X_encoded = preprocessor.fit_transform(features)

In [75]:
joblib.dump(preprocessor, "preprocessor_credit_score.pkl")

['preprocessor_credit_score.pkl']

In [76]:
X_train, X_test, y_train, y_test = train_test_split(X_encoded, target, test_size=0.2, random_state=42)

## Linear Regression

In [77]:
# Train
lr_model = LinearRegression()

In [78]:
lr_model.fit(X_train, y_train)

In [79]:
joblib.dump(lr_model, "lr_score_model.pkl")

['lr_score_model.pkl']

In [80]:
# Evaluate
lr_pred = lr_model.predict(X_test)

In [81]:
# Calculate performance metrics
def evaluate_model(y_true, y_pred, model_name):
    print(f"=== {model_name} Model Evaluation ===")
    print(f"Mean Absolute Error: {mean_absolute_error(y_true, y_pred):.2f}")
    print(f"Mean Squared Error: {mean_squared_error(y_true, y_pred):.2f}")
    print(f"R-squared Score: {r2_score(y_true, y_pred):.2f}")
    print("\n")

In [82]:
# Evaluate both models
evaluate_model(y_test, lr_pred, "Credit Score Prediction")

=== Credit Score Prediction Model Evaluation ===
Mean Absolute Error: 11.25
Mean Squared Error: 207.08
R-squared Score: 0.95




## Random Forest Regressor

In [83]:
# Train
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

In [84]:
rf_model.fit(X_train, y_train)

In [85]:
joblib.dump(rf_model, "rf_score_model.pkl")

['rf_score_model.pkl']

In [86]:
# Evaluate
rf_pred = rf_model.predict(X_test)

In [87]:
# Evaluate both models
evaluate_model(y_test, rf_pred, "Credit Score Prediction")

=== Credit Score Prediction Model Evaluation ===
Mean Absolute Error: 13.87
Mean Squared Error: 313.21
R-squared Score: 0.93




## XGBoost

In [88]:
# Train
xgb_model = xgb.XGBRegressor(n_estimators=200, learning_rate=0.1, random_state=42)

In [89]:
xgb_model.fit(X_train, y_train)

AttributeError: 'super' object has no attribute '__sklearn_tags__'

AttributeError: 'super' object has no attribute '__sklearn_tags__'

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=None, max_leaves=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=200, n_jobs=None,
             num_parallel_tree=None, random_state=42, ...)

In [90]:
joblib.dump(xgb_model, "xgb_score_model.pkl")

['xgb_score_model.pkl']

In [91]:
# Evaluate
xgb_pred = xgb_model.predict(X_test)

In [92]:
# Evaluate both models
evaluate_model(y_test, xgb_pred, "Credit Score Prediction")

=== Credit Score Prediction Model Evaluation ===
Mean Absolute Error: 11.92
Mean Squared Error: 230.00
R-squared Score: 0.95




## Ridge Regression

In [93]:
ridge_model = Ridge(alpha=1.0)

In [94]:
ridge_model.fit(X_train, y_train)

In [95]:
ridge_pred = ridge_model.predict(X_test)

In [96]:
evaluate_model(y_test, ridge_pred, "Credit Score Prediction")

=== Credit Score Prediction Model Evaluation ===
Mean Absolute Error: 11.25
Mean Squared Error: 207.07
R-squared Score: 0.95




## Lasso Model

In [97]:
lasso_model = Lasso(alpha=0.1)

In [98]:
lasso_model.fit(X_train, y_train)

In [99]:
lasso_pred = lasso_model.predict(X_test)

In [100]:
evaluate_model(y_test, lasso_pred, "Credit Score Prediction")

=== Credit Score Prediction Model Evaluation ===
Mean Absolute Error: 11.30
Mean Squared Error: 207.85
R-squared Score: 0.95




## Decision Tree Regression

In [101]:
tree_model = DecisionTreeRegressor(max_depth=5, random_state=42)

In [102]:
tree_model.fit(X_train, y_train)

In [103]:
tree_pred = tree_model.predict(X_test)

In [104]:
evaluate_model(y_test, tree_pred, "Credit Score Prediction")

=== Credit Score Prediction Model Evaluation ===
Mean Absolute Error: 27.95
Mean Squared Error: 1215.63
R-squared Score: 0.72




## Gradient Boosting Regressor

In [105]:
gb_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)

In [106]:
gb_model.fit(X_train, y_train)

In [107]:
gb_pred = gb_model.predict(X_test)

In [108]:
evaluate_model(y_test, gb_pred, "Credit Score Prediction")

=== Credit Score Prediction Model Evaluation ===
Mean Absolute Error: 12.20
Mean Squared Error: 239.26
R-squared Score: 0.95




## Support Vector Regression

In [109]:
svr_model = SVR(kernel='rbf', C=100, gamma=0.1)

In [110]:
svr_model.fit(X_train, y_train)

In [111]:
svr_pred = svr_model.predict(X_test)

In [112]:
evaluate_model(y_test, svr_pred, "Credit Score Prediction")

=== Credit Score Prediction Model Evaluation ===
Mean Absolute Error: 11.80
Mean Squared Error: 224.74
R-squared Score: 0.95


