# **Traduction de gestes de la main en emoji**



In [7]:
# import

# TORCH
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Function

# SKLEARN
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# OTHER
import numpy as np
import matplotlib.pyplot as plt
import h5py
import pandas as pd 
import seaborn as sns
import pylab

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

## **Chargement des données**

In [4]:
data = np.genfromtxt('/content/drive/My Drive/DATA2020/sign_mnist_train.csv',delimiter=',')
data = data[1:,:]
labels = data[:,0]
data = data[1:,1:]

ind = np.argwhere((labels == 5)).reshape(-1)
ind2 = np.argwhere((labels == 21)).reshape(-1)
indices = np.concatenate((ind, ind2))
data = data[indices]
labels = labels[indices]

In [3]:
data2 = np.genfromtxt('/content/drive/My Drive/DATA2020/sign_mnist_test.csv',delimiter=',')
data2 = data2[1:,:]
labels2 = data2[:,0]
data2 = data2[1:,1:]

ind = np.argwhere((labels2 == 5)).reshape(-1)
ind2 = np.argwhere((labels2 == 21)).reshape(-1)
indices = np.concatenate((ind, ind2))
data2 = data2[indices]
labels2 = labels2[indices]


In [5]:
data = np.concatenate((data, data2))
labels = np.concatenate((labels, labels2))

In [6]:
x_train = data[0:2500]
y_train = labels[0:2500]
x_test = data[2500:]
y_test = data[2500:]
print("Data Train Shape :: ",x_train.shape)
print("Data Test Shape :: ",x_test.shape)

Data Train Shape ::  (2500, 784)
Data Test Shape ::  (379, 784)


## **CNN MODEL**

In [9]:
class CNNGest(nn.Module):
    def __init__(self):
        super(CNNGest, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=(3,3), stride=(1,1), padding=(1,1))
        self.pool = nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3,3), stride=(1,1), padding=(1,1))
        self.fc1 = nn.Linear(16*7*7, 2)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)

        return x

def accuracy(yhat, y):
    return accuracy_score(y, yhat)

## **Entraînement du modèle**

In [None]:

# Rechargement des données 

x_train = data[0:2500]
y_train = labels[0:2500]
x_test = data[2500:]
y_test = data[2500:]

y_train[y_train==5] = 0
y_train[y_train==21] = 1
y_test[y_test==5] = 0
y_test[y_test==21] = 1
print(x_train.shape)
print(x_test.shape)

print(y_train)

if __name__ == '__main__':

    NB_EPOCHS = 1000 # nombre d'itération 
    BATCH_SIZE = 64
    eps = 0.05
    NORMA = True # normalisation

    # --- TRAITEMENT DES DONNEES --- #

    ## SPLIT TRAIN TEST ##

    X_train, X_test, y_train, y_test = train_test_split(x_train, y_train , test_size=0.2, random_state=42)
    X_train, X_test, y_train, y_test = torch.tensor(X_train), torch.tensor(X_test), torch.tensor(y_train), torch.tensor(y_test)

    # --- Normalisation des données --- #

    if NORMA : 
        scaler = StandardScaler()
        X_train = torch.tensor(scaler.fit_transform(X_train))
        X_test = torch.tensor(scaler.fit_transform(X_test))

    input_d = X_train.shape[1]

    # --- CREATION DES BATCHS --- #

    NB_BATCH = int(len(X_train)/BATCH_SIZE)

    batch_x = list(np.split(X_train[:NB_BATCH*BATCH_SIZE], NB_BATCH))
    batch_x.append(X_train[(NB_BATCH*BATCH_SIZE)-1:])
    batch_y = list(np.split(y_train[:NB_BATCH*BATCH_SIZE], NB_BATCH))
    batch_y.append(y_train[(NB_BATCH*BATCH_SIZE)-1:])

    ### --- MODELs --- ###

    model = CNNGest()

    # --- OPTIMISEUR --- #

    optim = torch.optim.SGD(params=model.parameters(),lr=eps) 

    optim.zero_grad()

    # --- LOSS --- #

    criterion = torch.nn.CrossEntropyLoss()

    tabErr = []
    tabInd = []
    tabErr_test = []
    acc_train = []
    acc_test = []
    for epoch in range(NB_EPOCHS):
        for ind in range(NB_BATCH) : 
            x = batch_x[ind].float().view(BATCH_SIZE,1,28,28)
            y = batch_y[ind]
            yhat = model(x.float())
            loss = criterion(yhat,y.long())

            yhat.retain_grad()
            loss.retain_grad()

            loss.backward()
            
            optim.step()
            optim.zero_grad()
        
        print("--- Epoch --- "+str(epoch))
        print("Loss train (Dernier batch) :: ", loss.item())
        print("Accuracy train (Dernier batch) :: ", accuracy(torch.argmax(yhat, dim=1), y.long())) 


## **Sauvegarde du modèle**

In [None]:
PATH = "/content/drive/My Drive/DATA2020/modelGeste.pt"
torch.save(model, PATH)