In [None]:
from sklearn.model_selection import GridSearchCV, StratifiedShuffleSplit, train_test_split
from sklearn.svm import SVC
import pandas as pd

In [None]:
model_name = "test"

In [None]:
# NOTE read in data and split for training
data = pd.read_csv("diabetes.csv")
X, y = data.drop(columns="Outcome"), data.Outcome
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# NOTE Train model
import joblib

svm = SVC(random_state=37)
sss = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=37)
params = {"kernel": ["linear"], "C": [0.1]}
grid_search = GridSearchCV(svm, params, n_jobs=-1, cv=sss)

grid_search.fit(X_train, y_train)
joblib.dump(grid_search.best_estimator_, f"models/{model_name}.joblib")

In [None]:
# NOTE Output stats

print("Parameter:", params)
print("Estimator:", grid_search.best_estimator_)
print("Train_Score:", grid_search.best_score_)
print("Test_Score:", grid_search.best_estimator_.score(X_test, y_test))

In [None]:
import joblib

models_path = ["models/svm_model.joblib", "models/svm_model_with_gamma.joblib"]

for path in models_path:
    model = joblib.load(path)
    name = model.__class__.__name__
    filename = path.split("/")[1].split(".")[0]
    train_score = model.score(X_train, y_train)
    test_score = model.score(X_test, y_test)
    c = model.get_params()["C"]
    gamma = model.get_params()["gamma"]
    kernel = model.get_params()["kernel"]

    text = f"""
        {name} - {filename}
        -----------
        Parameter:
            C: {c}
            Gamma: {gamma}
            Kernel: {kernel}
        Scores:
            Train: {train_score}
            Test: {test_score}
        """
    print(text)