# Début d'implémentation du modèle

## Traitement des données
A réfléchir par la suite.

### Etape 0 : on importe ce qui est nécessaire

In [53]:
import numpy as np
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import math

### Étape 1 : Créer le vocabulaire à partir du corpus de phrases

In [3]:
corpus = [
    'il est un roi',
    'elle est une reine',
    'il est un homme',
    'elle est une femme',
    'londres est la capitale du royaume uni',
    "berlin est la capitale de l allemagne",
    'paris est la capitale de la france',
]

In [4]:
def tokenize(corpus):
    tokens = [phrase.split() for phrase in corpus]
    return tokens

t_corpus = tokenize(corpus)
t_corpus

[['il', 'est', 'un', 'roi'],
 ['elle', 'est', 'une', 'reine'],
 ['il', 'est', 'un', 'homme'],
 ['elle', 'est', 'une', 'femme'],
 ['londres', 'est', 'la', 'capitale', 'du', 'royaume', 'uni'],
 ['berlin', 'est', 'la', 'capitale', 'de', 'l', 'allemagne'],
 ['paris', 'est', 'la', 'capitale', 'de', 'la', 'france']]

In [5]:
voc = []
for phrase in t_corpus:
    for mot in phrase:
        if mot not in voc:
            voc.append(mot)
voc_size = len(voc)
voc

['il',
 'est',
 'un',
 'roi',
 'elle',
 'une',
 'reine',
 'homme',
 'femme',
 'londres',
 'la',
 'capitale',
 'du',
 'royaume',
 'uni',
 'berlin',
 'de',
 'l',
 'allemagne',
 'paris',
 'france']

### Étape 2 : on s'intéresse aux mots centraux et aux contextes suivant taille de fenêtre

In [6]:
mot_index = {w: index for (index, w) in enumerate(voc)}
index_mot = {index: w for (index, w) in enumerate(voc)}

taille_fenetre = 3
index_pairs = []
# On traite chaque phrase.
for phrase in t_corpus:
    indices = [mot_index[mot] for mot in phrase]
    # On traite chaque mot comme un mot central
    for center_word in range(len(indices)):
        # Pour chaque fenetre possible
        for w in range(-taille_fenetre, taille_fenetre + 1):
            context_word = center_word + w
            # On fait attention à ne pas sauter de phrases
            if context_word < 0 or context_word >= len(indices) or center_word == context_word:
                continue
            context_word_ind = indices[context_word]
            index_pairs.append((indices[center_word], context_word_ind))


In [7]:
index_pairs_np = np.array(index_pairs)
index_pairs_np

array([[ 0,  1],
       [ 0,  2],
       [ 0,  3],
       [ 1,  0],
       [ 1,  2],
       [ 1,  3],
       [ 2,  0],
       [ 2,  1],
       [ 2,  3],
       [ 3,  0],
       [ 3,  1],
       [ 3,  2],
       [ 4,  1],
       [ 4,  5],
       [ 4,  6],
       [ 1,  4],
       [ 1,  5],
       [ 1,  6],
       [ 5,  4],
       [ 5,  1],
       [ 5,  6],
       [ 6,  4],
       [ 6,  1],
       [ 6,  5],
       [ 0,  1],
       [ 0,  2],
       [ 0,  7],
       [ 1,  0],
       [ 1,  2],
       [ 1,  7],
       [ 2,  0],
       [ 2,  1],
       [ 2,  7],
       [ 7,  0],
       [ 7,  1],
       [ 7,  2],
       [ 4,  1],
       [ 4,  5],
       [ 4,  8],
       [ 1,  4],
       [ 1,  5],
       [ 1,  8],
       [ 5,  4],
       [ 5,  1],
       [ 5,  8],
       [ 8,  4],
       [ 8,  1],
       [ 8,  5],
       [ 9,  1],
       [ 9, 10],
       [ 9, 11],
       [ 1,  9],
       [ 1, 10],
       [ 1, 11],
       [ 1, 12],
       [10,  9],
       [10,  1],
       [10, 11],
       [10, 12

### Étape 3 : création du modèle

In [66]:
def get_input_layer(word_idx):
    x = torch.zeros(voc_size).float()
    x[word_idx] = 1.0
    return x

embedding_dims = 10
W1 = Variable(torch.randn(embedding_dims, voc_size).float(), requires_grad=True)
W2 = Variable(torch.randn(voc_size, embedding_dims).float(), requires_grad=True)
num_epochs = 100
learning_rate = 0.001

for epo in range(num_epochs):
    loss_val = 0
    for data, target in index_pairs:
        x = Variable(get_input_layer(data)).float()
        y_true = Variable(torch.from_numpy(np.array([target])).long())

        z1 = torch.matmul(W1, x)
        z2 = torch.matmul(W2, z1)
    
        log_softmax = F.log_softmax(z2, dim=0)

        loss = F.nll_loss(log_softmax.view(1,-1), y_true)
        loss_val += loss.data
        
        loss.backward()
        W1.data -= learning_rate * W1.grad.data
        W2.data -= learning_rate * W2.grad.data

        W1.grad.data.zero_()
        W2.grad.data.zero_()

In [67]:
W1

tensor([[-0.4947,  0.3315, -1.2877, -1.5805,  0.9756, -1.2312,  0.3168, -0.7427,
          0.3270,  0.4944, -0.0674,  0.2432,  0.5014,  0.4188,  0.2039, -1.8357,
         -0.9471,  0.1982,  0.9237, -1.1309,  1.3950],
        [-0.3704,  0.5868,  1.1734,  0.9662,  0.2670,  1.5990,  0.7265,  0.6291,
         -0.5157,  0.1331,  1.1935, -0.2927, -0.6029,  0.5952,  1.1235,  0.5484,
         -1.7887,  1.0046,  1.1069, -0.7145,  0.3478],
        [ 0.5176, -0.2069,  1.7235, -0.2625,  1.0030,  0.8355,  0.3853, -0.6875,
         -0.1186,  0.5999,  0.5016,  0.6227, -0.1554,  0.1733,  0.8219, -0.2846,
         -0.5204, -0.1643, -0.1752,  0.0122,  0.0879],
        [ 0.2715, -0.9451,  0.0382,  0.8749, -0.7978, -0.0213, -0.3202, -1.4086,
         -0.5324, -1.3127, -0.0517, -0.2834,  0.4801, -1.6827, -0.8192, -0.2363,
         -0.2977, -0.7848, -1.2861, -0.1196, -1.0171],
        [-0.6902, -1.3185,  2.3124,  0.1529,  0.3183, -0.4200, -0.4375,  0.0459,
          0.6620, -1.1570, -1.2753, -0.0809, -0.014

In [68]:
W2

tensor([[ 0.0563, -0.6103, -0.3536,  0.7024, -0.6107,  0.9776,  0.1286, -2.1149,
          1.1871,  1.6894],
        [-1.5432, -0.0383,  0.5483, -0.4739, -1.2686,  0.5057, -0.2272,  0.1025,
          1.8997,  0.6612],
        [ 0.4759, -0.6195, -0.3552, -0.2747, -1.7599,  1.4184, -0.4935,  0.6567,
          1.8008,  1.1913],
        [-1.3825, -0.5304,  1.8013,  0.4008,  1.4296, -1.0825, -0.2343,  0.8421,
         -1.0614,  0.0057],
        [-0.1359,  0.0525,  0.0738, -0.3734,  0.3258, -0.4658, -0.4858, -1.1292,
         -1.1494,  0.1789],
        [ 0.5469, -0.3780, -0.8927,  0.0305,  0.3106,  0.2116, -0.3787, -0.6493,
         -0.4919, -0.3775],
        [ 1.6229, -0.0247, -0.0207,  1.6323, -0.2777,  1.3059, -0.6560,  0.6080,
         -0.8605, -2.1835],
        [ 0.0979, -1.8978,  1.1006, -2.3466,  1.8410,  0.7941, -0.6344,  0.2943,
         -1.0345,  0.4165],
        [ 0.9177, -0.2124,  0.7497,  0.9389, -0.7027,  1.1617,  0.1121, -0.3598,
          0.5035, -0.1728],
        [ 0.4171,  

In [64]:
def cos_distance(u, v):
    return (np.dot(u, v)  / (math.sqrt(np.dot(u, u))) *  (math.sqrt(np.dot(v, v))))