In [None]:

from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV
import pickle

In [None]:
# Loading the dataset
df = pd.read_csv('/kaggle/input/credit-scores-csv/credit_scores.csv')
df

In [None]:
# Preprocessing the data
df = df.drop(columns=["Name", "SSN", "ID", "Customer_ID"])
X = df.drop('Credit_Score', axis=1)
y = df['Credit_Score']
df

In [None]:
# Dividing the dataset into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [None]:
# Identifying numerical and categorical columns
num_cols = X_train.select_dtypes(include=np.number).columns.tolist()
cat_cols = X_train.select_dtypes(exclude=np.number).columns.tolist()

# Handling missing values and scale numerical columns
num_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())])

# Handling missing values and convert categorical columns into numerical columns
cat_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder())])

# Combining preprocessing steps
preprocessor = ColumnTransformer(
    transformers=[
        ('num', num_transformer, num_cols),
        ('cat', cat_transformer, cat_cols)])

In [None]:
# Developing a Support Vector Machine model
model = SVC()

# Create a pipeline
clf = Pipeline(steps=[('preprocessor', preprocessor),
                      ('classifier', model)])

In [None]:
# Fine-tunning hyperparameters
# Defining the parameter grid
param_grid = {'classifier__kernel': ['rbf', 'linear'], 'classifier__C': [0.01, 10, 20]}

# Using randomized search with a specified number of iterations
random_search = RandomizedSearchCV(clf, param_grid, cv=5, n_iter=4, random_state=1)
random_search.fit(X_train, y_train)

# Reporting the accuracy of the best model
print("Best parameters: ", random_search.best_params_)
print("Best score: ", random_search.best_score_)

In [None]:
# Retraining the best model using the whole dataset
best_model = random_search.best_estimator_
best_model.fit(X, y)

In [None]:
# Calculating the score of the best model on training data
train_score = best_model.score(X_train, y_train)
print("Training score: ", train_score)

# Calculating the score of the best model on test data
test_score = best_model.score(X_test, y_test)
print("Test score: ", test_score)

In [None]:
# Saving the best model as a file
filename = 'credit_scores_MLproj.pkl'

# Saving the model to disk
with open(filename, 'wb') as file:
    pickle.dump(best_model, file)