In [2]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

# Load and preprocess data
def load_data():
    train = pd.read_csv('fashion-mnist_train.csv')
    test = pd.read_csv('fashion-mnist_test.csv')
    
    X_train = train.drop('label', axis=1).values.astype('float32') / 255.0
    y_train = train['label'].values
    X_test = test.drop('label', axis=1).values.astype('float32') / 255.0
    y_test = test['label'].values
    
    return X_train, y_train, X_test, y_test

X_train, y_train, X_test, y_test = load_data()

X_train = X_train[:5000]
y_train = y_train[:5000]

# Linear SVM
print("\nTraining Linear SVM:")
linear_svc = SVC(kernel='linear', random_state=42)
linear_params = {'C': [0.1, 1, 10, 100]}
linear_grid = GridSearchCV(linear_svc, linear_params, cv=3, n_jobs=-1, verbose=1)
linear_grid.fit(X_train, y_train)
print(f"Best parameters: {linear_grid.best_params_}")
print(f"Best CV score: {linear_grid.best_score_:.3f}")

# Polynomial SVM
print("\nTraining Polynomial SVM:")
poly_svc = SVC(kernel='poly', random_state=42)
poly_params = {
    'C': [0.1, 1, 10],
    'degree': [2, 3, 4]
}
poly_grid = GridSearchCV(poly_svc, poly_params, cv=3, n_jobs=-1, verbose=1)
poly_grid.fit(X_train, y_train)
print(f"Best parameters: {poly_grid.best_params_}")
print(f"Best CV score: {poly_grid.best_score_:.3f}")

# RBF SVM
print("\nTraining RBF SVM:")
rbf_svc = SVC(kernel='rbf', random_state=42)
rbf_params = {
    'C': [0.1, 1, 10],
    'gamma': [0.01, 0.1, 1]
}
rbf_grid = GridSearchCV(rbf_svc, rbf_params, cv=3, n_jobs=-1, verbose=1)
rbf_grid.fit(X_train, y_train)
print(f"Best parameters: {rbf_grid.best_params_}")
print(f"Best CV score: {rbf_grid.best_score_:.3f}")

# Evaluate best models on test set
def evaluate_model(model, name):
    y_pred = model.predict(X_test)
    print(f"\n{name} Performance:")
    print(classification_report(y_test, y_pred))

evaluate_model(linear_grid.best_estimator_, "Linear SVM")
evaluate_model(poly_grid.best_estimator_, "Polynomial SVM")
evaluate_model(rbf_grid.best_estimator_, "RBF SVM")


Training Linear SVM:
Fitting 3 folds for each of 4 candidates, totalling 12 fits
Best parameters: {'C': 0.1}
Best CV score: 0.840

Training Polynomial SVM:
Fitting 3 folds for each of 9 candidates, totalling 27 fits
Best parameters: {'C': 10, 'degree': 2}
Best CV score: 0.841

Training RBF SVM:
Fitting 3 folds for each of 9 candidates, totalling 27 fits
Best parameters: {'C': 10, 'gamma': 0.01}
Best CV score: 0.860

Linear SVM Performance:
              precision    recall  f1-score   support

           0       0.77      0.80      0.78      1000
           1       0.97      0.97      0.97      1000
           2       0.76      0.74      0.75      1000
           3       0.84      0.88      0.86      1000
           4       0.75      0.79      0.77      1000
           5       0.91      0.90      0.90      1000
           6       0.64      0.58      0.61      1000
           7       0.89      0.90      0.90      1000
           8       0.95      0.94      0.95      1000
           9  