## Imports

In [1]:
import numpy as np
import spacy
from spacy import util
import pandas as pd
import os

In [14]:
import tensorflow as tf
from tensorflow.keras import Sequential 
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.metrics import Precision

## Data preprocessing

In [3]:
os.system('python -m spacy download fr_core_news_md')
nlp = spacy.load('fr_core_news_md')

In [4]:
# Load training and test sets
df_train = pd.read_json('datas/training_set.json')
df_test = pd.read_json('datas/testing_set.json')

print(f"Train shape : {df_train.shape}")
print(f"Test shape : {df_test.shape}")

Train shape : (6035, 2)
Test shape : (1065, 2)


In [5]:
embedding_size = 300 #according to the loaded model

# List of all intents in the same order as the model's output
intents = ["find-train", "irrelevant", "find-flight", "find-restaurant", "purchase", "find-around-me", "provide-showtimes", "find-hotel"]

# Transform sentence into words vectors
def sentence2vect(sentence):
    return nlp(sentence).vector

# One hot encode labels (take string representation of the label)
def label2vec(label):
    assert label in intents
    
    idx = intents.index(label)
    vec = np.zeros(len(intents))
    vec[idx] = 1
    return vec

In [10]:
# Create doc objects for sentences

df_train_spacy = pd.DataFrame(index = range(len(df_train)), columns = ['intent', 'sentence'])

for i in range(len(df_train)):
    df_train_spacy['intent'][i] = label2vec(df_train['intent'][i])
    df_train_spacy['sentence'][i] = sentence2vect(df_train['sentence'][i])


(300,)

In [11]:
df_train_spacy['sentence'][0].shape

(300,)

## Model creation and training

In [12]:
model = Sequential()
model.add(Dense(32, input_shape=(300,)))
model.add(Dense(64))
model.add(Dense(8, activation='softmax'))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 32)                9632      
_________________________________________________________________
dense_1 (Dense)              (None, 64)                2112      
_________________________________________________________________
dense_2 (Dense)              (None, 8)                 520       
Total params: 12,264
Trainable params: 12,264
Non-trainable params: 0
_________________________________________________________________


In [36]:
model.compile(
    optimizer='adam', loss='categorical_crossentropy', metrics=[Precision()]
)

print(df_train_spacy.values)


history = model.fit(x_train, y_train, batch_size=128, epochs=20, verbose=1)

[[array([0., 1., 0., 0., 0., 0., 0., 0.])
  array([ 1.6132153 , -1.5018038 , -0.10499381,  0.13790691, -0.99539244,
        1.4843131 , -1.7379278 ,  0.94319767,  1.7968175 , -0.3496427 ,
       -1.3796368 , -0.75916183, -0.5608298 , -1.9510568 ,  0.67749846,
       -0.2014001 , -0.33778536,  0.44528228,  0.68899155, -1.5050448 ,
       -1.3804402 ,  0.20137385, -0.0949133 ,  1.30512   , -1.4686908 ,
       -0.57022107, -0.11693113,  2.3768325 , -1.6059865 ,  0.9402294 ,
       -0.935737  , -0.20676425, -1.1419356 , -1.6836809 , -0.893097  ,
       -0.6775553 ,  0.90549076, -0.3979554 ,  0.32890844, -0.10179539,
        2.227459  , -0.96478426, -1.9570587 , -2.6309931 , -0.56262267,
       -0.5963999 ,  0.7028303 ,  1.3043224 , -1.2226439 , -1.5548475 ,
       -1.8313584 ,  0.2511692 ,  1.4863008 ,  1.7328238 , -0.04723309,
        0.43872553,  2.1159143 , -0.14840434, -1.8074883 ,  0.43343613,
        0.4706046 ,  1.0731    , -0.62098825,  1.0906816 ,  1.5219544 ,
        0.24258892, 

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.ndarray).