In [56]:
# Import necessary libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn import svm
from sklearn.model_selection import cross_val_score


In [57]:
# Load MNIST dataset
digits = datasets.load_digits()

In [58]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size=0.2, random_state=42)

### Linear SVM

In [59]:
# Define hyperparameters to search
param_grid_linear = {'svm__C': [0.1, 1, 10, 100, 1000], 'svm__max_iter': [1000, 5000, 10000]}

#### This pipeline is used only for grid search this is not the final model

In [60]:
# Define pipelines, for Linear SVM 
pipeline_linear = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', svm.LinearSVC(dual = False, max_iter = 10000))
])

In [61]:
# Create GridSearchCV instances
grid_linear = GridSearchCV(pipeline_linear, param_grid_linear, refit=True, verbose=3, n_jobs=-1)

In [62]:
# Fit the instances to the training data
grid_linear.fit(X_train, y_train)



Fitting 5 folds for each of 15 candidates, totalling 75 fits
[CV 2/5] END ....svm__C=0.1, svm__max_iter=1000;, score=0.941 total time=   0.2s
[CV 4/5] END ....svm__C=0.1, svm__max_iter=1000;, score=0.969 total time=   0.3s
[CV 2/5] END ....svm__C=0.1, svm__max_iter=5000;, score=0.941 total time=   0.3s
[CV 5/5] END ....svm__C=0.1, svm__max_iter=1000;, score=0.927 total time=   0.4s
[CV 1/5] END ....svm__C=0.1, svm__max_iter=5000;, score=0.969 total time=   0.3s
[CV 2/5] END ...svm__C=0.1, svm__max_iter=10000;, score=0.941 total time=   0.3s
[CV 1/5] END ....svm__C=0.1, svm__max_iter=1000;, score=0.969 total time=   0.4s
[CV 3/5] END ....svm__C=0.1, svm__max_iter=1000;, score=0.955 total time=   0.7s
[CV 4/5] END ...svm__C=0.1, svm__max_iter=10000;, score=0.969 total time=   0.4s
[CV 4/5] END ....svm__C=0.1, svm__max_iter=5000;, score=0.969 total time=   0.4s
[CV 3/5] END ....svm__C=0.1, svm__max_iter=5000;, score=0.955 total time=   0.5s
[CV 5/5] END ....svm__C=0.1, svm__max_iter=5000;



[CV 1/5] END ....svm__C=100, svm__max_iter=1000;, score=0.938 total time=   4.4s




[CV 5/5] END .....svm__C=10, svm__max_iter=1000;, score=0.906 total time=  10.2s




[CV 2/5] END .....svm__C=10, svm__max_iter=1000;, score=0.931 total time=  11.1s
[CV 2/5] END ....svm__C=100, svm__max_iter=1000;, score=0.931 total time=   4.2s




[CV 1/5] END .....svm__C=10, svm__max_iter=1000;, score=0.944 total time=  13.6s
[CV 5/5] END .....svm__C=10, svm__max_iter=5000;, score=0.906 total time=  11.0s
[CV 5/5] END ....svm__C=10, svm__max_iter=10000;, score=0.906 total time=  10.1s




[CV 3/5] END .....svm__C=10, svm__max_iter=1000;, score=0.934 total time=  13.9s
[CV 5/5] END ....svm__C=100, svm__max_iter=1000;, score=0.902 total time=   4.2s




[CV 4/5] END .....svm__C=10, svm__max_iter=1000;, score=0.930 total time=  15.1s
[CV 3/5] END ....svm__C=100, svm__max_iter=1000;, score=0.930 total time=   4.9s
[CV 1/5] END ....svm__C=100, svm__max_iter=5000;, score=0.934 total time=   4.4s




[CV 4/5] END ....svm__C=100, svm__max_iter=1000;, score=0.930 total time=   7.0s
[CV 5/5] END ....svm__C=100, svm__max_iter=5000;, score=0.902 total time=   3.7s
[CV 2/5] END ....svm__C=100, svm__max_iter=5000;, score=0.931 total time=   6.7s
[CV 2/5] END ....svm__C=10, svm__max_iter=10000;, score=0.934 total time=  18.1s
[CV 1/5] END ...svm__C=100, svm__max_iter=10000;, score=0.934 total time=   4.9s
[CV 2/5] END ...svm__C=100, svm__max_iter=10000;, score=0.931 total time=   6.1s
[CV 1/5] END ...svm__C=1000, svm__max_iter=1000;, score=0.934 total time=   3.2s




[CV 3/5] END .....svm__C=10, svm__max_iter=5000;, score=0.934 total time=  22.0s
[CV 2/5] END ...svm__C=1000, svm__max_iter=1000;, score=0.924 total time=   3.4s
[CV 5/5] END ...svm__C=100, svm__max_iter=10000;, score=0.902 total time=   4.2s
[CV 3/5] END ....svm__C=100, svm__max_iter=5000;, score=0.930 total time=  11.0s
[CV 1/5] END .....svm__C=10, svm__max_iter=5000;, score=0.948 total time=  23.2s




[CV 3/5] END ...svm__C=1000, svm__max_iter=1000;, score=0.934 total time=   3.9s
[CV 3/5] END ....svm__C=10, svm__max_iter=10000;, score=0.930 total time=  22.7s
[CV 1/5] END ....svm__C=10, svm__max_iter=10000;, score=0.948 total time=  23.3s
[CV 4/5] END ....svm__C=100, svm__max_iter=5000;, score=0.930 total time=  10.6s
[CV 4/5] END .....svm__C=10, svm__max_iter=5000;, score=0.930 total time=  23.7s
[CV 5/5] END ...svm__C=1000, svm__max_iter=1000;, score=0.899 total time=   3.3s




[CV 4/5] END ...svm__C=1000, svm__max_iter=1000;, score=0.927 total time=   3.9s
[CV 3/5] END ...svm__C=100, svm__max_iter=10000;, score=0.930 total time=   9.4s




[CV 2/5] END .....svm__C=10, svm__max_iter=5000;, score=0.934 total time=  25.3s
[CV 1/5] END ...svm__C=1000, svm__max_iter=5000;, score=0.934 total time=   3.1s
[CV 4/5] END ....svm__C=10, svm__max_iter=10000;, score=0.930 total time=  23.7s
[CV 4/5] END ...svm__C=100, svm__max_iter=10000;, score=0.930 total time=   8.2s
[CV 5/5] END ...svm__C=1000, svm__max_iter=5000;, score=0.899 total time=   2.3s
[CV 2/5] END ..svm__C=1000, svm__max_iter=10000;, score=0.924 total time=   1.8s
[CV 2/5] END ...svm__C=1000, svm__max_iter=5000;, score=0.924 total time=   3.3s
[CV 3/5] END ...svm__C=1000, svm__max_iter=5000;, score=0.934 total time=   3.2s
[CV 5/5] END ..svm__C=1000, svm__max_iter=10000;, score=0.899 total time=   1.4s
[CV 1/5] END ..svm__C=1000, svm__max_iter=10000;, score=0.934 total time=   1.9s
[CV 3/5] END ..svm__C=1000, svm__max_iter=10000;, score=0.934 total time=   1.7s
[CV 4/5] END ...svm__C=1000, svm__max_iter=5000;, score=0.927 total time=   2.9s
[CV 4/5] END ..svm__C=1000, 

In [63]:
# Get the best parameters
best_params_linear = grid_linear.best_params_
# Print the best parameters
print("Best parameters for Linear SVM: ", best_params_linear)

Best parameters for Linear SVM:  {'svm__C': 0.1, 'svm__max_iter': 1000}


#### Final pipeline with the best parameters

In [64]:
# Define the pipelines with the best parameters
pipeline_linear_best = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', svm.LinearSVC(C=best_params_linear['svm__C'], max_iter=10000, dual=False))
])

In [65]:
# Fit the pipelines with the best parameters to the training data
pipeline_linear_best.fit(X_train, y_train)

In [66]:
# Calculate and print the accuracy of the Linear SVM model
accuracy_linear = pipeline_linear_best.score(X_test, y_test)
print("Accuracy of Linear SVM: ", accuracy_linear)

Accuracy of Linear SVM:  0.9638888888888889


In [67]:
# Calculate the training score 
training_score_linear = pipeline_linear_best.score(X_train, y_train)
print("Training score of Linear SVM: ", training_score_linear)

# Calculate the cross-validation score
cv_score_linear = cross_val_score(pipeline_linear_best, X_train, y_train, cv=5).mean()
print("Cross-validation score of Linear SVM: ", cv_score_linear)

Training score of Linear SVM:  0.9874739039665971
Cross-validation score of Linear SVM:  0.9519792876500194


#### There is no overfitting or underfitting because both training score and cross-validation scores are both high and close to each other.

### Kernal SVM

In [68]:
# Define hyperparameters to search
param_grid_rbf = {'svm__C': [0.1, 1, 10, 100, 1000], 'svm__gamma': ['scale', 'auto', 0.0001, 0.001, 0.01, 0.1, 1]}

#### This pipeline is used only for grid search this is not the final model

In [69]:
# Define pipelines, for RBF SVM
pipeline_rbf = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', svm.SVC())
])

In [70]:
# Create GridSearchCV instances
grid_rbf = GridSearchCV(pipeline_rbf, param_grid_rbf, refit=True, verbose=3, n_jobs=-1)

In [71]:
# Fit the instances to the training data
grid_rbf.fit(X_train, y_train)

Fitting 5 folds for each of 35 candidates, totalling 175 fits
[CV 1/5] END ......svm__C=0.1, svm__gamma=scale;, score=0.934 total time=   0.2s
[CV 2/5] END ......svm__C=0.1, svm__gamma=scale;, score=0.917 total time=   0.2s[CV 3/5] END ......svm__C=0.1, svm__gamma=scale;, score=0.927 total time=   0.2s

[CV 4/5] END ......svm__C=0.1, svm__gamma=scale;, score=0.955 total time=   0.2s
[CV 5/5] END ......svm__C=0.1, svm__gamma=scale;, score=0.920 total time=   0.2s


[CV 4/5] END .......svm__C=0.1, svm__gamma=auto;, score=0.955 total time=   0.2s
[CV 3/5] END .......svm__C=0.1, svm__gamma=auto;, score=0.927 total time=   0.2s
[CV 2/5] END .......svm__C=0.1, svm__gamma=auto;, score=0.917 total time=   0.2s
[CV 1/5] END .......svm__C=0.1, svm__gamma=auto;, score=0.934 total time=   0.2s
[CV 5/5] END .......svm__C=0.1, svm__gamma=auto;, score=0.916 total time=   0.2s
[CV 4/5] END .....svm__C=0.1, svm__gamma=0.0001;, score=0.108 total time=   0.2s
[CV 3/5] END .....svm__C=0.1, svm__gamma=0.0001;, score=0.108 total time=   0.3s
[CV 1/5] END .....svm__C=0.1, svm__gamma=0.0001;, score=0.108 total time=   0.3s
[CV 1/5] END ......svm__C=0.1, svm__gamma=0.001;, score=0.358 total time=   0.3s
[CV 2/5] END .....svm__C=0.1, svm__gamma=0.0001;, score=0.108 total time=   0.3s
[CV 5/5] END .....svm__C=0.1, svm__gamma=0.0001;, score=0.105 total time=   0.3s
[CV 3/5] END .......svm__C=0.1, svm__gamma=0.01;, score=0.923 total time=   0.2s
[CV 1/5] END .......svm__C=0

In [72]:
# Get the best parameters
best_params_rbf = grid_rbf.best_params_
# Print the best parameters found and the accuracy for Kernel SVM
print("Best parameters for Kernel SVM: ", best_params_rbf)


Best parameters for Kernel SVM:  {'svm__C': 100, 'svm__gamma': 0.01}


#### Final pipeline with the best parameters

In [73]:
# Define the pipelines with the best parameters
pipeline_rbf_best = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', svm.SVC(C=best_params_rbf['svm__C'], gamma=best_params_rbf['svm__gamma']))
])


In [74]:
# Fit the pipelines with the best parameters to the training data
pipeline_rbf_best.fit(X_train, y_train)

In [75]:
# Calculate and print the accuracy of the Kernel SVM model
accuracy_rbf = pipeline_rbf_best.score(X_test, y_test)
print("Accuracy of Kernel SVM: ", accuracy_rbf)

Accuracy of Kernel SVM:  0.9805555555555555


In [76]:
# Calculate the training score
training_score_rbf = pipeline_rbf_best.score(X_train, y_train)
print("Training score of Kernel SVM: ", training_score_rbf)

# Calculate the cross-validation score
cv_score_rbf = cross_val_score(pipeline_rbf_best, X_train, y_train, cv=5).mean()
print("Cross-validation score of Kernel SVM: ", cv_score_rbf)


Training score of Kernel SVM:  1.0


Cross-validation score of Kernel SVM:  0.9812161246612467


#### The model is not over or underfitting, although the training score is 1.0 (perfect) the cross validation score is 0.98 so the gap between training and cross validation score is not that big. And since both the scores are high the model is certainly not underfitting.