In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import optuna

In [2]:
df = pd.read_csv("./processed_data.csv")
X = df.iloc[:, :-1].values
Y = df.iloc[:, -1].values

# Perform train-test split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0, stratify=Y)


In [3]:
# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [4]:
# Define the objective function for SVM and hyperparameter tuning
def objective(trial):
    C = trial.suggest_loguniform('C', 1e-5, 1e2)
    gamma = trial.suggest_loguniform('gamma', 1e-5, 1e2)

    svm = SVC(C=C, gamma=gamma)
    svm.fit(X_train, Y_train)
    
    preds = svm.predict(X_test)
    acc = accuracy_score(Y_test, preds)
    return acc


In [5]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=10)

best_params = study.best_params
print("Best hyperparameters:", best_params)

# Train SVM using the best hyperparameters
best_svm = SVC(C=best_params['C'], gamma=best_params['gamma'])
best_svm.fit(X_train, Y_train)

# Evaluate the best SVM model
preds = best_svm.predict(X_test)
acc = accuracy_score(Y_test, preds)
print(f"Accuracy: {acc}")

[I 2023-12-15 18:27:00,740] A new study created in memory with name: no-name-bbf45a79-575b-4755-bbef-176e660db828
  C = trial.suggest_loguniform('C', 1e-5, 1e2)
  gamma = trial.suggest_loguniform('gamma', 1e-5, 1e2)
[I 2023-12-15 19:52:22,025] Trial 0 finished with value: 0.539093206064009 and parameters: {'C': 55.54306181011332, 'gamma': 38.35844008937296}. Best is trial 0 with value: 0.539093206064009.
  C = trial.suggest_loguniform('C', 1e-5, 1e2)
  gamma = trial.suggest_loguniform('gamma', 1e-5, 1e2)
[I 2023-12-15 20:12:33,149] Trial 1 finished with value: 0.539093206064009 and parameters: {'C': 0.004209140359022083, 'gamma': 1.2084103222790683}. Best is trial 0 with value: 0.539093206064009.
  C = trial.suggest_loguniform('C', 1e-5, 1e2)
  gamma = trial.suggest_loguniform('gamma', 1e-5, 1e2)
[I 2023-12-15 20:42:45,043] Trial 2 finished with value: 0.545199326221224 and parameters: {'C': 0.1585029365329744, 'gamma': 0.21584698459088567}. Best is trial 2 with value: 0.54519932622122

Best hyperparameters: {'C': 0.1585029365329744, 'gamma': 0.21584698459088567}
Accuracy: 0.545199326221224


In [6]:
test_df = pd.read_csv("./test_data.csv")
test_df.shape

(30530, 136)

In [7]:
test_pred = best_svm.predict(test_df)
test_pred



array([2, 2, 2, ..., 2, 2, 2])

In [None]:
df_output = pd.read_csv("./canadian-hospital-re-admittance-challenge/sample_submission.csv")
df_output["readmission_id"] = test_pred
df_output.to_csv("submission_svm.csv", index=False)