In [None]:
#pip install scikit-optimize

In [None]:
'''
 -----------------------------------------------------------
          Artificial Intelligence Workshop RUG
 -----------------------------------------------------------
            R.M. (Rolando) Gonzales Martinez
 -----------------------------------------------------------
 ~~~~~~~ Credit scoring model with Machine Learning ~~~~~~~~
 support vector machines (with RBF kernel) vs. logistic model
'''
import pandas as pd
df = pd.read_excel("bankloans.xlsx")
print(df.head())
# age: Age in years
# education: Level of education, (1) did not complete high school, (2) high school degree, (3) some college, (4) college degree, (5) postundergraduate degree
# employears: Years with current employer
# address: Years at current address
# salary: salary in thousands
# creddebt: Credit card debt in thousands
# othdebt: Other debt in thousands
# default: credit default

In [None]:
# Hyper-parameter tuning
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from skopt import BayesSearchCV
from skopt.space import Real, Categorical
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# 1. Define target variable
y = df['default'].astype(int)   # 1 = default, 0 = no default

# 2. Select predictors
numerical_cols   = ['age', 'employears', 'salary', 'creddebt']
categorical_cols = ['education']

# 3. Build preprocessing pipelines
num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler',   StandardScaler())
])

cat_pipeline = Pipeline([
    ('imputer',    SimpleImputer(strategy='most_frequent')),
    ('onehot',     OneHotEncoder(handle_unknown='ignore', sparse_output=False))
])

preprocessor = ColumnTransformer([
    ('num', num_pipeline,   numerical_cols),
    ('cat', cat_pipeline,   categorical_cols)
])

# 4. Split into train/test
RANDOM_STATE =      #<-------------------------------------- fill here
TRAIN_SIZE   =      #<-------------------------------------- fill here

X_train, X_test, y_train, y_test = train_test_split(
    df[numerical_cols + categorical_cols],
    y,
    train_size=TRAIN_SIZE,
    random_state=RANDOM_STATE,
    stratify=y
)

# 5. Assemble full pipeline with SVC
pipe = Pipeline([
    ('pre', preprocessor),
    ('clf', SVC(probability=True, random_state=RANDOM_STATE))
])

# 6. Grid Search hyperparameters for SVC
param_grid = {
    'clf__C':     [0.01, , , 10, ], #<-------------------------------------- fill here
    'clf__gamma': [0.001, 0.01, 0.1, 1], #<-------------------------------------- fill here
    'clf__kernel': ['r', 'rbf'] #<-------------------------------------- fill here
}

grid_search = GridSearchCV(
    pipe,
    param_grid,
    cv= , #<-------------------------------------- fill here
    verbose=2,
    n_jobs=-1
)
grid_search.fit(X_train, y_train)

y_pred = grid_search.predict(X_test)
print("=== Grid Search Results ===")
print("Best params:", grid_search.best_params_)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# 7. Randomized Search
random_search = RandomizedSearchCV(
    pipe,
    param_distributions=param_grid,
    n_iter=10,
    cv= , #<-------------------------------------- fill here
    verbose=2,
    random_state=RANDOM_STATE,
    n_jobs=-1
)
random_search.fit(X_train, y_train)

y_pred = random_search.predict(X_test)
print("=== Random Search Results ===")
print("Best params:", random_search.best_params_)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# 8. Bayesian Optimization Search
search_spaces = {
    'clf__C':      Real(1e-3, 1e3, prior='log-uniform'),
    'clf__gamma':  Real(1e-4, 1e1, prior='log-uniform'),
    'clf__kernel': Categorical(['linear', 'rbf'])
}

bayes_search = BayesSearchCV(
    pipe,
    search_spaces=search_spaces,
    n_iter=32,
    cv= , #<-------------------------------------- fill here
    n_jobs=-1,
    verbose=2,
    random_state=RANDOM_STATE
)
bayes_search.fit(X_train, y_train)

print("=== Bayesian Optimization Results ===")
print("Best params:", bayes_search.best_params_)
print("Test Accuracy:", bayes_search.score(X_test, y_test))
