In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

In [3]:
X_train = pd.read_csv("X_train.csv")
X_train = X_train.iloc[:, 1:]
X_test = pd.read_csv("X_test.csv")
X_test = X_test.iloc[:, 1:]
Y_train = pd.read_csv("y_train.csv")
Y_test = pd.read_csv("y_test.csv")
Y_train = Y_train['Rating as Factor'].astype('category') #factorize trainset
Y_test = Y_test['Rating as Factor'].astype('category')   #factorize testset

In [19]:
# Create pipeline object with standard scaler and SVC estimator
pipe = Pipeline([('scaler', StandardScaler()), 
                 ('svm_poly', SVC(kernel='poly', random_state=0,max_iter=100000))])
# Define parameter grid
param_grid = {'svm_poly__C': [100],
              'svm_poly__degree': [3]}
# Run grid search
grid = GridSearchCV(pipe, param_grid=param_grid, cv=5, n_jobs=-1)
grid.fit(X_train, Y_train)

# Print results
print('Best CV accuracy: {:.2f}'.format(grid.best_score_))
print('Test score:       {:.2f}'.format(grid.score(X_test, Y_test)))
print('Best parameters: {}'.format(grid.best_params_))

Best CV accuracy: 0.83
Test score:       0.85
Best parameters: {'svm_poly__C': 100, 'svm_poly__degree': 3}


In [20]:
# Predict classes
y_pred = grid.predict(X_test)

# Manual confusion matrix as pandas DataFrame
confm = pd.DataFrame({'Predicted': y_pred,
                      'True': Y_test})
print(confm.groupby(['True','Predicted'], sort=True).size().unstack('Predicted')) 

Predicted     0      1      2     3      4      5     6      7      8     9   \
True                                                                           
0          640.0   11.0    NaN   NaN   48.0    NaN   NaN   23.0    8.0   NaN   
1           16.0  348.0   14.0   NaN    1.0    NaN   NaN    1.0    9.0   NaN   
2            1.0   14.0  181.0   NaN    NaN    2.0   NaN    NaN    NaN   NaN   
3            NaN    NaN    NaN  59.0    NaN    NaN   NaN    NaN    NaN   NaN   
4           24.0    2.0    1.0   NaN  963.0   25.0   2.0    4.0   69.0   NaN   
5            9.0    1.0    NaN   NaN   99.0  487.0   NaN    3.0   34.0   NaN   
6            NaN    NaN    5.0   NaN    NaN    NaN  80.0    NaN   12.0   1.0   
7           24.0    2.0    NaN   NaN   47.0    NaN   NaN  634.0   69.0   NaN   
8           36.0    4.0    NaN   NaN   94.0   11.0   NaN   35.0  755.0   NaN   
9            1.0    NaN    NaN   NaN    NaN    NaN   NaN    NaN    NaN  39.0   
10           NaN    NaN    NaN   NaN    