# Training of Classifier for Suits Recognition

The aim of this notebook is to train a simple Machine Learning classifier (after several tests, chose a Linear SVM) using sklearn with the aim of using it to predict the suits in the cards.

### Import useful classes and modules.

In [None]:
import os
import pickle
import cv2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import LinearSVC
from sklearn.model_selection import RepeatedKFold

from scripts.data_loader import DataLoader
from scripts.extract import Extractor
from scripts.extract import get_color_pixels
from scripts.figures_suits_classifier import FiguresSuitsClassifier

### Extract and Preprocess suits from each image as well as labels.

In [2]:
extractor = Extractor()
data_loader = DataLoader()

images = []

for game in data_loader.get_available_games():
    for round_ in data_loader.get_available_rounds(game):
        print("Game: {}, Round: {}".format(game, round_))
        image, label = data_loader[game, round_]
        
        dealer, cards, figures_suits = extractor(image)
        
        for player, figure_suit in figures_suits.items():
            
            # Avoid unlabelled samples.
            if label is None or not isinstance(label['P' + str(player)], str):
                continue
                          
            _, suit_label = label['P' + str(player)]
            mask = FiguresSuitsClassifier.preprocess_suit(figure_suit['suit'], figure_suit['color'])
            
            images.append( {'game': game, 'round': round_, 'player': player, 'image': mask, 'label': suit_label})
        
images = pd.DataFrame(images)

Game: 1, Round: 1
Game: 1, Round: 2
Game: 1, Round: 3
Game: 1, Round: 4
Game: 1, Round: 5
Game: 1, Round: 6
Game: 1, Round: 7
Game: 1, Round: 8
Game: 1, Round: 9
Game: 1, Round: 10
Game: 1, Round: 11
Game: 1, Round: 12
Game: 1, Round: 13
Game: 2, Round: 1
Game: 2, Round: 2
Game: 2, Round: 3
Game: 2, Round: 4
Game: 2, Round: 5
Game: 2, Round: 6
Game: 2, Round: 7
Game: 2, Round: 8
Game: 2, Round: 9
Game: 2, Round: 10
Game: 2, Round: 11
Game: 2, Round: 12
Game: 2, Round: 13
Game: 3, Round: 1
Game: 3, Round: 2
Game: 3, Round: 3
Game: 3, Round: 4
Game: 3, Round: 5
Game: 3, Round: 6
Game: 3, Round: 7
Game: 3, Round: 8
Game: 3, Round: 9
Game: 3, Round: 10
Game: 3, Round: 11
Game: 3, Round: 12
Game: 3, Round: 13
Game: 4, Round: 1
Game: 4, Round: 2
Game: 4, Round: 3
Game: 4, Round: 4
Game: 4, Round: 5
Game: 4, Round: 6
Game: 4, Round: 7
Game: 4, Round: 8
Game: 4, Round: 9
Game: 4, Round: 10
Game: 4, Round: 11
Game: 4, Round: 12
Game: 4, Round: 13
Game: 5, Round: 1
Game: 5, Round: 2
Game: 5, Rou

### Cross-Validation to see which parameters work best for this task.

In [3]:
label_translate = {'S': 0, 'C': 1, 'H': 2, 'D': 3}

results = []
for n in np.arange(5, 50, 5):
    X = []
    y = []

    for  _, img in images.iterrows(): 
        X.append( FiguresSuitsClassifier.get_fourier_descriptor(img["image"], n_coefficients_to_keep = n) )
        y.append( label_translate[img["label"]] )

    X = np.array(X)
    y = np.array(y)
        
    kf = RepeatedKFold(n_splits = 5, n_repeats = 5)

    for C in np.linspace(0.1, 1, 10):
        for penalty in ['l1', 'l2']:
            model = LinearSVC(C = C, tol = 1e-5, penalty = penalty, loss = 'squared_hinge', dual = False, max_iter = 500000)

            train_acc = []
            val_acc = []

            for train_idx, test_idx in kf.split(X):
                X_train, X_test = X[train_idx], X[test_idx]
                y_train, y_test = y[train_idx], y[test_idx]

                model.fit(X_train, y_train)

                train_acc.append(model.score(X_train,y_train))
                val_acc  .append(model.score(X_test, y_test))

            results.append({'n': n, 'C': C, 'penalty': penalty, 'train_acc': np.mean(train_acc), 'test_acc': np.mean(val_acc)})



In [4]:
results = pd.DataFrame(results)

pd.set_option('display.max_rows', None)

results.sort_values(['test_acc', 'train_acc', 'n', 'C'], ascending = [False, False, True, False]).reset_index(drop = True)

Unnamed: 0,n,C,penalty,train_acc,test_acc
0,35,0.6,l1,0.996796,0.9936
1,45,0.5,l2,1.0,0.993589
2,10,0.4,l1,0.996795,0.992975
3,45,0.6,l1,1.0,0.992975
4,45,0.1,l2,1.0,0.992965
5,25,0.1,l1,0.994712,0.992965
6,35,0.7,l1,0.996795,0.992944
7,35,1.0,l1,0.996794,0.992944
8,40,0.5,l1,0.996794,0.992934
9,15,0.2,l1,0.995032,0.992924


### Choose a set of parameters which works well, retrain on whole training set, and save model

In [5]:
n, C, penalty = 45, 0.9, 'l1'

X = []
y = []

for  _, img in images.iterrows(): 
    X.append( FiguresSuitsClassifier.get_fourier_descriptor(img["image"], n_coefficients_to_keep = n) )
    y.append( label_translate[img["label"]] )

X = np.array(X)
y = np.array(y)

model = LinearSVC(C = C, tol = 1e-5, penalty = penalty, loss = 'squared_hinge', dual = False, max_iter = 500000)

model.fit(X, y)

print('Train Accuracy:', model.score(X, y))

with open('./Models/suits_classifier.pkl', 'wb') as file:
    pickle.dump(model, file)
    pickle.dump(n, file)

Train Accuracy: 1.0
