# Models for plate characters recognition

In [27]:
from sklearn import svm
import numpy as np
import sklearn
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, StratifiedShuffleSplit
from sklearn import neighbors
from sklearn import tree
#from xgboost import XGBClassifier

## General utils

In [9]:
class GeneralModelUtils:
    def func_overX(X):
        Y = []
        for element in X:
            Y.append( sum(element.flatten())  > element.flatten().shape[0] //2)
        return np.asarray(Y)

    def testing():

        ##### SVM Testing #####
        X = np.random.rand(200 , 60,30)
        Y = func_overX(X)

        ### GridSearch ###
        model = SVMCHandler(X,Y)
        model.fit(with_score=True, with_grid=True)


    def _ensure_dimensionalit( arr):
            return arr if len(arr[0].shape) == 1 else [x.flatten() for x in arr]

    def _acc(y_pred, y_target):

        if type(y_pred) == np.array and type(y_target) == np.array :
            assert(y_pred.shape == y_target.shape)
            mask = y_pred == y_target

        else: 
            assert(len(y_pred) == len(y_target))
            mask = [x==y for x,y in zip(y_pred, y_target)]
        return sum(mask)/len(mask)

    #Scaler = StandardScaler()

    def do_scaling(X):
        Scaler = StandardScaler()
        return Scaler.fit_transform(X)

    def _gen_gridSearch(model,hyperparams):
        cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state= 42)
        grid = GridSearchCV(model , param_grid= hyperparams, cv=cv, n_jobs=10, verbose=3, scoring='roc_auc')
        return grid 


# Models

## SVM

In [10]:
class SVMCHandler(GeneralModelUtils):

    def __init__(self, X, Y, **kwargs):
        hyperparams = {
            'kernel':['linear', 'poly', 'rbf', 'sigmoid'],
            'C': np.logspace(-2, 10 , 5),
            'gamma': np.logspace(-9,3,5),
        }
        self.model = svm.SVC(**kwargs)
        self.X = do_scaling(_ensure_dimensionalit(X))
        self.Y = Y
        assert(len(Y.shape) ==1 )
        self.grid = _gen_gridSearch(self.model, hyperparams)
        
    
    def fit(self, with_score = True, with_grid=True):
        if with_grid:
            self.grid.fit(self.X, self.Y)
            print(f"The best parameters are {self.grid.best_params_} and the best score is {self.grid.best_score_}")
            
            self.model = self.model.__class__(**self.grid.best_params_)
            self.model.fit(self.X,self.Y)
        else : 
            self.model.fit(self.X, self.Y )
        if with_score:
            pred = self.predict(self.X)
            print(f"Train acc  is : {_acc(pred, self.Y)}")

    def predict(self, X):
        X = _ensure_dimensionalit(X)
        return self.model.predict(X)


## DL model

In [None]:
import torch
class BinaryCharacters_Classifier(nn.Module): 
    def __init__(self):
        super(Binary_Classifier, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3)
        self.conv1_bn = nn.BatchNorm2d(64)
        self.max_p_1 = F.max_pool2d
        self.activation_1 = F.relu
        self.conv1_drop = nn.Dropout2d()

        self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5)
        self.conv2_bn = nn.BatchNorm2d(128)
        self.max_p_2 = F.max_pool2d
        self.activation_2 = F.relu
        #self.conv2 = nn.Conv2d(10, 20, kernel_size=3)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(7680, 1024)
        self.fc2 = nn.Linear(1024, 37)

    def forward(self, x):
        #x = F.relu(F.max_pool2d(self.conv1(x), 2))
        batch = x.shape[0]
        x = self.conv1_drop(self.activation_1( self.max_p_1 ( self.conv1_bn ( self.conv1(x)), 2)))
        #x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = self.conv2_drop(self.activation_2( self.max_p_2 ( self.conv2_bn ( self.conv2(x)), 2)))
        x =  x.reshape(batch, -1) 
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x


class CNNHandler:
    def __init__(self, X, Y, **kwargs):
        assert('model_path' in kwargs)
        
        self.model = BinaryCharacters_Classifier().load_state_dict(torch.load(kwargs['model_path']))
        
    def fit(self, with_score = True, with_grid=True):
        print("Not implemented for deep learning model")

    def predict(self, x):
        if type(x) == list:
            assert(all([len(y.shape) == 3 and y.shape[1] == 60 and y.shape[2] == 30])), "Shape must be [B, 1, 60, 30 ]"
        else:
            assert(len(x.shape) == 4 and x.shape[2] == 60 and x.shape[3] = 30), "Shape must be [B, 1, 60, 30 ]"
        
        X = self.model(X)
        torch.argmax(X,1)
        return self.model.predict(X)


## KNN model

In [14]:
class KNNhandler:

    def __init__(self, X, Y, **kwargs):
        hyperparams = {
            'algorithm':['auto', 'ball_tree', 'kd_tree', 'brute'],
            'n_neighbors': np.arange(3, 10, 2),
            'p': np.arange(1,3),
        }
        self.model = neighbors.KNeighborsClassifier(**kwargs)
        self.X = do_scaling(_ensure_dimensionalit(X))
        self.Y = Y
        assert(len(Y.shape) ==1 )
        self.grid = _gen_gridSearch(self.model, hyperparams)
        
    
    def fit(self, with_score = True, with_grid=True):
        if with_grid:
            self.grid.fit(self.X, self.Y)
            print(f"The best parameters are {self.grid.best_params_} and the best score is {self.grid.best_score_}")
            self.model = self.model.__class__(**self.grid.best_params_)
            self.model.fit(self.X,self.Y)
        else : 
            self.model.fit(self.X, self.Y )
        if with_score:
            pred = self.predict(self.X)
            print(f"Train acc  is : {_acc(pred, self.Y)}")

    def predict(self, X):
        X = _ensure_dimensionalit(X)
        return self.model.predict(X)


# Decision tree

In [29]:
class DTCHandler:

    def __init__(self, X, Y, **kwargs):
        hyperparams = {
            'criterion':['gini', 'entropy'],
            'max_features': ['auto', 'sqrt', 'log2'],
            'splitter': ['best', 'random'],
        }
        self.model = tree.DecisionTreeClassifier(**kwargs)
        self.X = do_scaling(_ensure_dimensionalit(X))
        self.Y = Y
        assert(len(Y.shape) ==1 )
        self.grid = _gen_gridSearch(self.model, hyperparams)
        
    
    def fit(self, with_score = True, with_grid=True):
        if with_grid:
            self.grid.fit(self.X, self.Y)
            print(f"The best parameters are {self.grid.best_params_} and the best score is {self.grid.best_score_}")
            self.model = self.model.__class__(**self.grid.best_params_)
            self.model.fit(self.X,self.Y)
        else : 
            self.model.fit(self.X, self.Y )
        if with_score:
            pred = self.predict(self.X)
            print(f"Train acc  is : {_acc(pred, self.Y)}")

    def predict(self, X):
        X = _ensure_dimensionalit(X)
        return self.model.predict(X)
