In [None]:
import sys
import os
sys.path.append(os.path.abspath(".."))

# --- hide warnings ---
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=ConvergenceWarning)

import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from src.preprocessing import preprocessor, X, Y
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix


# --- split data ---
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.20, random_state=42)

# --- flatten y to avoid DataConversionWarning ---
y_train = y_train.values.ravel()
y_test  = y_test.values.ravel()

# --- define pipeline with SVM ---
chain = Pipeline([
    ("preprocessing", preprocessor),
    ("pca", PCA(random_state=42)),
    ("linear_SVC", SVC())
])

# --- parameters for GridSearchCV ---
param_grid = {
    'linear_SVC__C': [1, 10],
    'linear_SVC__max_iter': [1000, 5000],
    'linear_SVC__kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'linear_SVC__gamma': ['auto', 'scale'],
    'linear_SVC__class_weight': ['balanced'],
    'pca__n_components': [2, 5]
}

# --- GridSearchCV object ---
grid_search = GridSearchCV(chain, param_grid, scoring='accuracy', cv=5)

# --- train model ---
grid_search.fit(X_train, y_train)

# --- predictions ---
y_pred = grid_search.predict(X_test)

# --- evaluation metrics ---
accuracy_values = accuracy_score(y_test, y_pred)
precision_values = precision_score(y_test, y_pred, average='macro')
recall_values = recall_score(y_test, y_pred, average='macro')
confusionmatrix_values = confusion_matrix(y_test, y_pred, labels=['low', 'middle', 'high'])

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = colu

0.44594594594594594
0.4912828519385896
0.4155991831430428
[[26  0  1]
 [16  2  1]
 [19  4  5]]
              precision    recall  f1-score   support

        high      0.714     0.179     0.286        28
         low      0.426     0.963     0.591        27
      middle      0.333     0.105     0.160        19

    accuracy                          0.446        74
   macro avg      0.491     0.416     0.346        74
weighted avg      0.511     0.446     0.365        74

['low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low'
 'low' 'high' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low'
 'low' 'high' 'low' 'middle' 'low' 'low' 'low' 'low' 'middle' 'low' 'low'
 'middle' 'low' 'low' 'low' 'low' 'low' 'low' 'high' 'low' 'low' 'low'
 'low' 'high' 'low' 'low' 'low' 'middle' 'low' 'low' 'high' 'low' 'low'
 'high' 'low' 'low' 'high' 'middle' 'low' 'low' 'low' 'low' 'low' 'low'
 'low' 'low' 'middle' 'low' 'low' 'low']
