# C-Support Vector Classification

In [3]:
# libraries
import numpy as np
import pandas as pd
import os
import tensorflow as tf
import matplotlib.pyplot as plt
import imageio.v3 as imageio
import cv2

In [4]:
# Get directories
train_dir = "C:/Users/bodin/School/MATH 5680/Projec/tumor_images/Training"
test_dir = "C:/Users/bodin/School/MATH 5680/Projec/tumor_images/Testing"

labels = ["glioma_tumor","meningioma_tumor","no_tumor","pituitary_tumor"]

In [5]:
# Create list of data
train_img_list = []
test_img_list = []

test_label_list = []
train_label_list = []

# Training dataset
for label in labels:
    for img_file in os.listdir(train_dir + '/' + label):
        img = cv2.imread(train_dir + '/' + label + '/' + img_file)
        img = cv2.resize(img, (200,200))
        train_img_list.append(img)
        train_label_list.append(label)

# Testing dataset
for label in labels:
    for img_file in os.listdir(test_dir + '/' + label):
        img = cv2.imread(test_dir + '/' + label + '/' + img_file)
        img = cv2.resize(img, (200,200))
        test_img_list.append(img)
        test_label_list.append(label)

In [6]:
# Encode labels
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()

train_label_list = label_encoder.fit_transform(train_label_list)
test_label_list = label_encoder.fit_transform(test_label_list)

# Convert images to int64 array
train_img_list = np.array(train_img_list).astype(np.int64)
test_img_list = np.array(test_img_list).astype(np.int64)

In [7]:
# Value Counts for each type of tumor
"""
Pituitary = 3
Glioma = 0
Meningioma = 1
None = 2
"""
pd.Series(train_label_list).value_counts()

3    827
0    826
1    822
2    395
dtype: int64

In [8]:
# Convert images from (200, 200, 3) to column vector (120000)
X_train = train_img_list.reshape(len(train_img_list), -1)
X_test = test_img_list.reshape(len(test_img_list), -1)
Y_train = train_label_list
Y_test = test_label_list

In [9]:
# Standardize values between 0-1
X_train = X_train / 255
X_test = X_test / 255

In [10]:
### Not necessary in current state, may delete ###

#from sklearn.decomposition import PCA
#pca = PCA(.5)

#pca_train = pca.fit_transform(X_train)
#pca_test = pca.transform(X_test)

#pca_train = X_train
#pca_test = X_test

In [11]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
import warnings
from sklearn.exceptions import ConvergenceWarning

# Ignore convergence warnings
warnings.filterwarnings("ignore", category=ConvergenceWarning)

"""
SVC parameters to alter:

C: Regularization parameter. The strength of the regularization 
is inversely proportional to C. Must be strictly positive. 
The penalty is a squared l2 penalty. [default=1.0]

kernel: Specifies the kernel type to be used in the algorithm.
{‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’}
[default=rbf]

degree: Degree of the polynomial kernel function (‘poly’)
[default = 3]

gamma: Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’.
[default = 'scale']

tol: Tolerance for stopping criterion.
[default = 1e-3]

"""
sv = SVC(max_iter=100)
param_grid = {'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
             'C': [0.1, 1, 10]}

grid_search = GridSearchCV(sv, param_grid, cv=5, scoring='accuracy', verbose=2)
grid_search.fit(X_train, Y_train)



Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV] END ...............................C=0.1, kernel=linear; total time=  43.6s
[CV] END ...............................C=0.1, kernel=linear; total time=  41.6s
[CV] END ...............................C=0.1, kernel=linear; total time=  41.1s
[CV] END ...............................C=0.1, kernel=linear; total time=  40.8s
[CV] END ...............................C=0.1, kernel=linear; total time=  41.4s
[CV] END .................................C=0.1, kernel=poly; total time=  43.7s
[CV] END .................................C=0.1, kernel=poly; total time=  43.7s
[CV] END .................................C=0.1, kernel=poly; total time=  43.5s
[CV] END .................................C=0.1, kernel=poly; total time=  43.8s
[CV] END .................................C=0.1, kernel=poly; total time=  43.5s
[CV] END ..................................C=0.1, kernel=rbf; total time= 1.4min
[CV] END ..................................C=0.1

In [12]:
# Print the best parameters and corresponding accuracy score
print("Best Parameters: ", grid_search.best_params_)
print("Best Accuracy Score: {:.4f}".format(grid_search.best_score_))

Best Parameters:  {'C': 10, 'kernel': 'rbf'}
Best Accuracy Score: 0.6819


In [14]:
sv = SVC(kernel='rbf',
        C=10)
sv.fit(X_train, Y_train)

In [None]:
# Evaluation
from sklearn.metrics import (accuracy_score,
                             classification_report,
                             confusion_matrix)

y_pred = sv.predict(X_test)
accuracy = accuracy_score(Y_test, y_pred)

print("Training Score: ", sv.score(X_train, Y_train))
print("testing Score: ", sv.score(X_test, Y_test))
print(f"Current hyperparameter: {sv.get_params()['C']}")
print(f'Accuracy: {accuracy}\n')
print(f"Confusion Matrix: \n", confusion_matrix(Y_test, y_pred), "\n")
print(classification_report(Y_test, y_pred))