In [1]:
import numpy as np

In [None]:
x_test = np.load("../data/classification/X_test.npy")
y_test = np.load("../data/classification/y_test.npy")
x_train = np.load("../data/classification/X_train.npy")
y_train = np.load("../data/classification/y_train.npy")

In [57]:
print("x_test shape:", x_test.shape)
print("y_test shape:", y_test.shape)
print("x_train first sample:", x_train[0, :10])

x_test shape: (2000, 30)
y_test shape: (2000,)
x_train first sample: [-0.27430128  0.20091783 -0.59955357 -1.33564259 -3.12191075  1.28998558
 -0.37995092  3.14831389 -2.08409739  0.25375361]


In [40]:
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

# Logistic Regression
from sklearn.linear_model import LogisticRegression
param_grid_lr = {
    'logreg__C': [0.001, 0.01, 0.1, 1, 10, 100],
    'logreg__penalty': ['l2'],
    'logreg__solver': ['lbfgs', 'saga'],
    'logreg__max_iter': [500, 1000, 2000]
}
pipe_lr = Pipeline([
    ('scaler', StandardScaler()), 
    ('logreg', LogisticRegression(random_state=42))
])
gs_lr = GridSearchCV(pipe_lr, param_grid_lr, cv=5, n_jobs=-1)
gs_lr.fit(x_train, y_train)
print("Best Logistic Regression test score:", gs_lr.score(x_test, y_test))
print("Best Logistic Regression params:", gs_lr.best_params_)

# SVC
from sklearn.svm import SVC
param_grid_svc = {
    'svc__C': [0.01, 0.1, 1, 10, 100],
    'svc__kernel': ['linear', 'rbf', 'poly'],
    'svc__gamma': ['scale', 'auto']
}
pipe_svc = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(random_state=42))
])
gs_svc = GridSearchCV(pipe_svc, param_grid_svc, cv=5, n_jobs=-1)
gs_svc.fit(x_train, y_train)
print("Best SVC test score:", gs_svc.score(x_test, y_test))
print("Best SVC params:", gs_svc.best_params_)

# KNN
from sklearn.neighbors import KNeighborsClassifier
param_grid_knn = {
    'knn__n_neighbors': [5, 10, 20, 30, 40, 50],
    'knn__weights': ['uniform', 'distance'],
    'knn__p': [1, 2]
}
pipe_knn = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsClassifier())
])
gs_knn = GridSearchCV(pipe_knn, param_grid_knn, cv=5, n_jobs=-1)
gs_knn.fit(x_train, y_train)
print("Best KNN test score:", gs_knn.score(x_test, y_test))
print("Best KNN params:", gs_knn.best_params_)

Best Logistic Regression test score: 0.7385
Best Logistic Regression params: {'logreg__C': 0.1, 'logreg__max_iter': 500, 'logreg__penalty': 'l2', 'logreg__solver': 'lbfgs'}
Best SVC test score: 0.787
Best SVC params: {'svc__C': 1, 'svc__gamma': 'scale', 'svc__kernel': 'rbf'}
Best KNN test score: 0.782
Best KNN params: {'knn__n_neighbors': 30, 'knn__p': 1, 'knn__weights': 'distance'}


In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

pipe_svc = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(random_state=42))
])

param_grid_svc_fine = {
    'svc__C': [0.1, 0.5, 1, 5, 10, 20],
    'svc__kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'svc__gamma': ['scale', 'auto', 0.01, 0.1, 1],
    'svc__degree': [2, 3, 4],          # uniquement pour 'poly'
    'svc__coef0': [0, 0.1, 0.5, 1]    # pour 'poly' et 'sigmoid'
}

gs_svc_fine = GridSearchCV(pipe_svc, param_grid_svc_fine, cv=5, n_jobs=-1, verbose=0)
gs_svc_fine.fit(x_train, y_train)

print("Best SVC test score:", gs_svc_fine.score(x_test, y_test))
print("Best SVC params:", gs_svc_fine.best_params_)


In [59]:
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

pipe_svc_custom = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(random_state=1))
])

pipe_svc_custom.set_params(
    svc__C=5,
    svc__coef0=0,
    svc__degree=2,
    svc__gamma=0.1,
    svc__kernel='rbf'
)

pipe_svc_custom.fit(x_train, y_train)
print("Custom SVC test score:", pipe_svc_custom.score(x_test, y_test))

Custom SVC test score: 0.8
