In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import joblib

dataset_path = '/content/Updated_Credit_Score_Classification_Dataset.csv'
df = pd.read_csv(dataset_path)

# Preprocessing
categorical_features = ['Gender', 'Education', 'Marital Status', 'Home Ownership']
numeric_features = ['Age', 'Income', 'Number of Children']
categorical_transformer = OneHotEncoder(handle_unknown='ignore')
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Define the target and features
X = df.drop('Final Credit Score', axis=1)
y = df['Final Credit Score']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a pipeline that preprocesses the data and then applies the RandomForestRegressor
model = Pipeline(steps=[('preprocessor', preprocessor),
                        ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))])
# Training the model
model.fit(X_train, y_train)

# Saving the model to disk
joblib.dump(model, 'credit_score_model.pkl')

['credit_score_model.pkl']

In [5]:
# Load the saved model
model = joblib.load('credit_score_model.pkl')

data = {
    'Age': [35],
    'Gender': ['Male'],
    'Income': [50000],
    'Education': ["Bachelor's Degree"],
    'Marital Status': ['Married'],
    'Number of Children': [2],
    'Home Ownership': ['Owned']
}

input_data = pd.DataFrame(data)
predicted_score = model.predict(input_data)
print("Predicted Credit Score:", predicted_score[0])

Predicted Credit Score: 652.6410120948237
