In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

In [2]:
# Load and preprocess data
data_train = pd.read_csv('train.csv').drop(['ID'], axis=1)
data_test = pd.read_csv('test.csv')

X = data_train.drop(['label'], axis=1)
y = data_train['label']

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.4, random_state=42)
X_test = data_test.drop(['ID'], axis=1)


In [3]:
# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Hyperparameter Tuning
param_grid = {
    'C': [0.3, 0.7, 1, 3],
    'kernel': ['rbf', 'poly', 'sigmoid'],
    'gamma': ['scale', 'auto', 0.1, 1],
}


In [4]:
grid_search = GridSearchCV(SVC(), param_grid, cv=5, scoring='accuracy', n_jobs=-1, verbose=1)
grid_search.fit(X_train_scaled, y_train)

best_svc = grid_search.best_estimator_
print(f"Best parameters: {grid_search.best_params_}")
print(f"Best cross-validation score: {grid_search.best_score_:.4f}")

# Evaluate model on validation set
y_val_pred = best_svc.predict(X_val_scaled)
accuracy = accuracy_score(y_val, y_val_pred)
print(f"SVM Validation Accuracy: {accuracy:.4f}")

Fitting 5 folds for each of 48 candidates, totalling 240 fits
Best parameters: {'C': 3, 'gamma': 'scale', 'kernel': 'rbf'}
Best cross-validation score: 0.7275
SVM Validation Accuracy: 0.7276


In [5]:
y_test_pred = best_svc.predict(X_test_scaled)

# Create submission file
submission = pd.DataFrame({'ID': data_test['ID'], 'label': y_test_pred})
submission.to_csv('submission_svc.csv', index=False)