In [3]:
from PIL import Image
import numpy as np
import os
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC, LinearSVC
from sklearn.preprocessing import Normalizer
from sklearn.metrics import accuracy_score
from sklearnex import patch_sklearn
patch_sklearn()


Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [2]:
#getting the data
categories = ['bad', 'good', 'neutral']
data = []
labels = []
for label, category in enumerate(categories):
    for i in tqdm(os.listdir(os.path.join('images', category)), desc=f'Getting data for {category}'):
        data.append(np.asarray(Image.open(os.path.join('images', category, i)).convert('L')).flatten())
        labels.append(label)

data = np.asarray(data)
labels = np.asarray(labels)
print(data.size)
print(labels.size)

Getting data for bad:   0%|          | 0/63611 [00:00<?, ?it/s]

Getting data for good:   0%|          | 0/60066 [00:00<?, ?it/s]

Getting data for neutral:   0%|          | 0/76315 [00:00<?, ?it/s]

1999920000
199992


In [3]:
rng1 = np.random.default_rng(seed=5111997)
permutation = rng1.permutation(len(labels))
data = data[permutation]
labels = labels[permutation]

In [4]:
data_short = data[:20000]
labels_short = labels[:20000]

In [5]:
#train/test split
# x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels, random_state=5111997)
x_train, x_test, y_train, y_test = train_test_split(data_short, labels_short, test_size=0.2, random_state=5111997)

In [13]:
data_rest = data[20000:]
x_full = Normalizer(copy=False).fit_transform(data_rest)

In [6]:
x_train = Normalizer(copy=False).fit_transform(x_train)
x_test = Normalizer(copy=False).fit_transform(x_test)

In [7]:
len(y_test)

4000

In [8]:
#train SVM
classifier = LinearSVC(verbose=3, C=1000)
# classifier = SVC(verbose=True, C=1000, gamma=0.01)
# parameters = [{'gamma': [0.01, 0.001, 0.0001], 'C': [1, 10, 100, 1000]}]
# parameters = [{'C': [1, 10, 100, 1000]}]
# grid_search = GridSearchCV(classifier, parameters, verbose=3)
# grid_search.fit(x_train, y_train)
classifier.fit(x_train, y_train)


[LibLinear]

In [14]:
# best_estimator = grid_search.best_estimator_
# y_test_prediction = best_estimator.predict(x_test)
# print(accuracy_score(y_test_prediction, y_test))
classifier.score(x_full, labels[20000:])

0.681352504555758

In [12]:
print(grid_search.best_params_)

{'C': 1000, 'gamma': 0.01}


In [16]:
#saving model
import pickle
with open('models/SVM/LinearSVC_samples20000_C1000.pkl','wb') as f:
    pickle.dump(classifier,f)

In [17]:
import pickle
with open('models/SVM/LinearSVC_samples20000_C1000.pkl', 'rb') as f:
    clf2 = pickle.load(f)
clf2.predict(Normalizer(copy=False).fit_transform([np.asarray(Image.open(f'images\\good\\1br3k1_pp3pp1_2nqp1p1_3p4_3Pn3_2P1QN1P_PP3PP1_2NRR1K1 w - - 12 21.png').convert('L')).flatten()]))

array([0])

In [7]:
Normalizer(copy=False).fit_transform([np.asarray(Image.open(f'images\\bad\\1B1k3r_3b1ppp_2qb4_p6n_PpBp4_1P1P3P_2P2PP1_RN2R1K1 w - - 1 21.png').convert('L')).flatten()])

array([[0.01437995, 0.01437995, 0.01437995, ..., 0.01437995, 0.01437995,
        0.01437995]])

In [1]:
import torchvision
import torch
torchvision.__version__
torch.cuda.is_available()

True