In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

data = pd.read_csv('dataset.csv', delimiter=';')

X = data.drop(columns=['Target'])
y = data['Target']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

In [14]:
svc = SVC(kernel='linear', random_state=42)
svc.fit(X_train, y_train)

train_predictions = svc.predict(X_train)
test_predictions = svc.predict(X_test)

train_accuracy = accuracy_score(y_train, train_predictions)
test_accuracy = accuracy_score(y_test, test_predictions)

train_f1_macro = f1_score(y_train, train_predictions, average='macro')
test_f1_macro = f1_score(y_test, test_predictions, average='macro')

train_conf_matrix = confusion_matrix(y_train, train_predictions)
test_conf_matrix = confusion_matrix(y_test, test_predictions)

results = {
    "Train Accuracy": train_accuracy,
    "Test Accuracy": test_accuracy,
    "Train F1 Macro": train_f1_macro,
    "Test F1 Macro": test_f1_macro,
    "Train Confusion Matrix": train_conf_matrix,
    "Test Confusion Matrix": test_conf_matrix
}

results

{'Train Accuracy': 0.7803617571059431,
 'Test Accuracy': 0.7643072289156626,
 'Train F1 Macro': 0.7039734158519528,
 'Test F1 Macro': 0.6814289561080694,
 'Train Confusion Matrix': array([[ 731,   99,  150],
        [ 103,  212,  234],
        [  27,   67, 1473]], dtype=int64),
 'Test Confusion Matrix': array([[342,  36,  63],
        [ 61,  79, 105],
        [ 13,  35, 594]], dtype=int64)}

In [15]:
results

{'Train Accuracy': 0.7803617571059431,
 'Test Accuracy': 0.7643072289156626,
 'Train F1 Macro': 0.7039734158519528,
 'Test F1 Macro': 0.6814289561080694,
 'Train Confusion Matrix': array([[ 731,   99,  150],
        [ 103,  212,  234],
        [  27,   67, 1473]], dtype=int64),
 'Test Confusion Matrix': array([[342,  36,  63],
        [ 61,  79, 105],
        [ 13,  35, 594]], dtype=int64)}

## Grid Search for Hyperparameter Tuning

In [4]:
from sklearn.model_selection import GridSearchCV

param_grid_reduced = {
    'C': [0.1, 1, 10],
    'gamma': [0.1, 0.01],
    'kernel': ['linear', 'rbf']
}

# Initializing GridSearchCV with reduced parameter grid
grid_reduced = GridSearchCV(SVC(), param_grid_reduced, refit=True, verbose=2, cv=3)

# Fitting the grid search to the data
grid_reduced.fit(X_train, y_train)

# Best parameters and estimator
best_params_reduced = grid_reduced.best_params_
best_estimator_reduced = grid_reduced.best_estimator_

# Predictions with the best estimator
train_predictions_best_reduced = best_estimator_reduced.predict(X_train)
test_predictions_best_reduced = best_estimator_reduced.predict(X_test)

# Model evaluation with the best estimator
train_accuracy_best_reduced = accuracy_score(y_train, train_predictions_best_reduced)
test_accuracy_best_reduced = accuracy_score(y_test, test_predictions_best_reduced)

# F1-score calculation for the best estimator
train_f1_macro_best_reduced = f1_score(y_train, train_predictions_best_reduced, average='macro')
test_f1_macro_best_reduced = f1_score(y_test, test_predictions_best_reduced, average='macro')

# Confusion matrices with the best estimator
train_conf_matrix_best_reduced = confusion_matrix(y_train, train_predictions_best_reduced)
test_conf_matrix_best_reduced = confusion_matrix(y_test, test_predictions_best_reduced)

# Collecting the results
grid_search_results_reduced = {
    "Best Parameters": best_params_reduced,
    "Train Accuracy with Best Estimator": train_accuracy_best_reduced,
    "Test Accuracy with Best Estimator": test_accuracy_best_reduced,
    "Train F1 Macro with Best Estimator": train_f1_macro_best_reduced,
    "Test F1 Macro with Best Estimator": test_f1_macro_best_reduced,
    "Train Confusion Matrix with Best Estimator": train_conf_matrix_best_reduced,
    "Test Confusion Matrix with Best Estimator": test_conf_matrix_best_reduced
}

grid_search_results_reduced


Fitting 3 folds for each of 12 candidates, totalling 36 fits
[CV] END ....................C=0.1, gamma=0.1, kernel=linear; total time=   0.4s
[CV] END ....................C=0.1, gamma=0.1, kernel=linear; total time=   0.5s
[CV] END ....................C=0.1, gamma=0.1, kernel=linear; total time=   0.4s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   1.5s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   1.4s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   1.6s
[CV] END ...................C=0.1, gamma=0.01, kernel=linear; total time=   0.5s
[CV] END ...................C=0.1, gamma=0.01, kernel=linear; total time=   0.6s
[CV] END ...................C=0.1, gamma=0.01, kernel=linear; total time=   0.5s
[CV] END ......................C=0.1, gamma=0.01, kernel=rbf; total time=   1.7s
[CV] END ......................C=0.1, gamma=0.01, kernel=rbf; total time=   2.3s
[CV] END ......................C=0.1, gamma=0.01

{'Best Parameters': {'C': 0.1, 'gamma': 0.1, 'kernel': 'linear'},
 'Train Accuracy with Best Estimator': 0.7816537467700259,
 'Test Accuracy with Best Estimator': 0.7605421686746988,
 'Train F1 Macro with Best Estimator': 0.704224683395518,
 'Test F1 Macro with Best Estimator': 0.6775267971262046,
 'Train Confusion Matrix with Best Estimator': array([[ 730,   98,  152],
        [  92,  208,  249],
        [  21,   64, 1482]], dtype=int64),
 'Test Confusion Matrix with Best Estimator': array([[336,  41,  64],
        [ 51,  78, 116],
        [ 12,  34, 596]], dtype=int64)}