In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.preprocessing import (LabelEncoder, PolynomialFeatures, StandardScaler, OneHotEncoder, LabelEncoder)
import numpy as np
from sklearn.pipeline import Pipeline
import seaborn as sb
from sklearn.compose import ColumnTransformer
from sklearn.metrics import f1_score

In [None]:
d = sb.load_dataset("titanic")
d["age"].fillna(d["age"].mean(), inplace=True)
d.dropna(inplace=True)

y = d['alive']


d.drop(columns=["deck", "adult_male", "who", "embark_town", "alive"], inplace=True)

x_train, x_test, y_train, y_test = train_test_split(d, y, shuffle=True, stratify=y)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  d["age"].fillna(d["age"].mean(), inplace=True)


In [None]:
x_train

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,alone
689,1,1,female,15.000000,0,1,211.3375,S,First,False
544,0,1,male,50.000000,1,0,106.4250,C,First,False
751,1,3,male,6.000000,0,1,12.4750,S,Third,False
310,1,1,female,24.000000,0,0,83.1583,C,First,True
715,0,3,male,19.000000,0,0,7.6500,S,Third,True
...,...,...,...,...,...,...,...,...,...,...
31,1,1,female,29.699118,1,0,146.5208,C,First,False
765,1,1,female,51.000000,1,0,77.9583,S,First,False
369,1,1,female,24.000000,0,0,69.3000,C,First,True
621,1,1,male,42.000000,1,0,52.5542,S,First,False


In [None]:
preprocessing = ColumnTransformer([
    ("num", StandardScaler(), ["age", "fare"]),
    ("cat", OneHotEncoder(), ["sex", "class", "alone"])
])
pipeline = Pipeline([
    ("pre", preprocessing),
    ("model" , SVC())
])
param_grid = {
    "model__C": [250, 300, 350],  # Regularization strength
    "model__kernel": ["linear", "poly", "rbf", "sigmoid"],  # Different kernel types
    "model__degree": [1],  # Only relevant for 'poly' kernel
    "model__gamma": [0.003, 0.004, 0.005, 0.006],  # Kernel coefficient for 'rbf', 'poly', and 'sigmoid'
}

GSV = GridSearchCV(estimator=pipeline,param_grid=param_grid,  cv = 4, scoring="f1_macro")

In [None]:
GSV.fit(x_train,y_train)

In [None]:
GSV.best_estimator_

In [None]:
y_test_pd = pd.DataFrame(y_test)
y_pred_pd = pd.DataFrame(GSV.best_estimator_.predict(x_test))
encoder = OneHotEncoder()
y_test_encoded = encoder.fit_transform(y_test_pd)
y_pred_encoded = encoder.fit_transform(y_pred_pd)
f1_score(y_test_encoded, y_pred_encoded, average= "macro")

0.7415204678362572