In [46]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.preprocessing import OneHotEncoder, scale

mnist_train_csv = "../data/mnist_train.csv"
mnist_test_csv = "../data/mnist_test.csv"

df_train = pd.read_csv(mnist_train_csv, header=None) # data_frame

In [47]:
labels = df_train.iloc[:, 0] # iloc "indice" ; loc "identifiant string" ; iloc + rapide que loc
labels = labels.values.astype(np.uint8) # transforme en array numpy

labels

array([5, 0, 4, ..., 5, 6, 8], dtype=uint8)

In [48]:
## 'identique' à la ligne du dessous, déduit la premiere dimension
# images = df.iloc[:, 1:].values.astype(np.uint8).reshape(60000, 28, 28)

images = df_train.iloc[:, 1:].values.astype(np.uint8).reshape(-1, 28, 28) # desapplati ...
# images

In [49]:
n = labels.shape[0]
idx = np.arange(n)
np.random.shuffle(idx)
idx1 = idx[::2]
idx2 = idx[1::2]
idx1

array([53124, 16673,  1846, ...,  1130, 55745, 21709])

In [50]:
images[idx1, ...].shape

(30000, 28, 28)

In [51]:
fig = px.imshow(images[0])
fig.show()

In [52]:
train_y_ = labels[idx1] + labels[idx2]
train_y_

array([ 5, 15, 11, ...,  7,  2,  3], dtype=uint8)

In [53]:
enc = OneHotEncoder(handle_unknown='ignore')
train_y = enc.fit_transform((labels[idx1] + labels[idx2]).reshape(-1, 1)).todense()
train_y.shape

(30000, 19)

In [54]:
train_y[0]

matrix([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0.]])

In [55]:
train_x = np.concatenate([images[idx1, ...], images[idx2, ...]], axis=2) # concatene images sur le 2e axe

In [56]:
train_x = train_x.reshape(-1, 28*56) # re applatir pour le réseau

In [57]:
train_x[0, :].reshape(28, 56)

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)

In [58]:
train_x = scale(train_x)

In [59]:
fig = px.imshow(train_x[0, :].reshape(28, 56))
fig.show()

In [60]:
df_test = pd.read_csv(mnist_test_csv, header=None) # data_frame

In [61]:
labels = df_train.iloc[:, 0] # iloc "indice" ; loc "identifiant string" ; iloc + rapide que loc
labels = labels.values.astype(np.uint8) # transforme en array numpy
images = df_train.iloc[:, 1:].values.astype(np.uint8).reshape(-1, 28, 28) # desapplati ...
n = labels.shape[0]
idx = np.arange(n)
np.random.shuffle(idx)
idx1 = idx[::2]
idx2 = idx[1::2]
val_y_ = labels[idx1] + labels[idx2]
enc = OneHotEncoder(handle_unknown='ignore')
val_y = enc.fit_transform((labels[idx1] + labels[idx2]).reshape(-1, 1)).todense()
val_y.shape

val_x = np.concatenate([images[idx1, ...], images[idx2, ...]], axis=2) # concatene images sur le 2e axe
val_x.reshape(-1, 28*56).shape # re applatir pour le réseau

(30000, 1568)

In [62]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

In [63]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.l1 = nn.Linear(1568, 392)
        self.relu1 = nn.ReLU() # casse la linearite
        self.l2 = nn.Linear(392, 98)
        self.relu2 = nn.ReLU() # casse la linearite
        self.l3 = nn.Linear(98, 19)
        
    def forward(self, x):
        x = self.l1(x)
        x = self.relu1(x)
        x = self.l2(x)
        x = self.relu2(x)
        x = self.l3(x)
        return F.softmax(x, dim=1)

In [64]:
net = Network()

In [65]:
optimizer = optim.SGD(net.parameters(), lr=0.00005, momentum=0.9) # quelle fonction de descente de gradient ? rapidité de descente sur l'erreur
loss_func = nn.CrossEntropyLoss()

In [66]:
train_x.shape

(30000, 1568)

In [67]:
x = torch.FloatTensor(train_x)
y = torch.LongTensor(train_y_)

loss_log = []

batch_size = 10
n = train_x.shape[0]

for e in range(20):
    for i in range(0, n, batch_size):
        x_mini = x[i:i + batch_size] 
        y_mini = y[i:i + batch_size] 
        
        x_var = Variable(x_mini)
        y_var = Variable(y_mini)
        
        optimizer.zero_grad()
        net_out = net(x_var)
        
        loss = loss_func(net_out, y_var)
        loss.backward()
        optimizer.step()
        
        if i % 100 == 0:
            loss_log.append(loss.item())
        
    print(f'Epoch: {e} - Loss: {loss.item():.6f}')

Epoch: 0 - Loss: 2.938962
Epoch: 1 - Loss: 2.938627
Epoch: 2 - Loss: 2.938294
Epoch: 3 - Loss: 2.937934
Epoch: 4 - Loss: 2.937555
Epoch: 5 - Loss: 2.937150
Epoch: 6 - Loss: 2.936738
Epoch: 7 - Loss: 2.936316
Epoch: 8 - Loss: 2.935873
Epoch: 9 - Loss: 2.935407
Epoch: 10 - Loss: 2.934916
Epoch: 11 - Loss: 2.934392
Epoch: 12 - Loss: 2.933841
Epoch: 13 - Loss: 2.933262
Epoch: 14 - Loss: 2.932646
Epoch: 15 - Loss: 2.932005
Epoch: 16 - Loss: 2.931315
Epoch: 17 - Loss: 2.930585
Epoch: 18 - Loss: 2.929814
Epoch: 19 - Loss: 2.928994
