In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

# Chemins vers les fichiers de données
train_path = 'C:/Users/dell latitude 7400/Downloads/Tal/data/train.csv'
test_path = 'C:/Users/dell latitude 7400/Downloads/Tal/data/test.csv'

# Charger les données
train_data = pd.read_csv(train_path)
test_data = pd.read_csv(test_path)

In [5]:
# Prétraitement des données
tokenizer = Tokenizer(num_words=10000, oov_token="<OOV>")
tokenizer.fit_on_texts(train_data['titre'] + " " + train_data['recette'])
train_sequences = tokenizer.texts_to_sequences(train_data['titre'] + " " + train_data['recette'])
test_sequences = tokenizer.texts_to_sequences(test_data['titre'] + " " + test_data['recette'])

max_length = max(len(x) for x in train_sequences) 

In [6]:
# Padding des séquences
train_padded_sequences = pad_sequences(train_sequences, maxlen=max_length, padding='post')
test_padded_sequences = pad_sequences(test_sequences, maxlen=max_length, padding='post')

# Encodage des labels
label_encoder = LabelEncoder()
train_labels = label_encoder.fit_transform(train_data['type'])
train_labels_categorical = to_categorical(train_labels)

# Séparation des données en ensembles d'entraînement et de validation
X_train, X_valid, y_train, y_valid = train_test_split(train_padded_sequences, train_labels_categorical, test_size=0.2, random_state=42)


In [8]:
# Construction du modèle RNN
model = Sequential([
    Embedding(input_dim=10000, output_dim=64),
    Bidirectional(LSTM(64)),
    Dense(64, activation='relu'),
    Dense(train_labels_categorical.shape[1], activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Entraînement du modèle
model.fit(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))

Epoch 1/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m288s[0m 907ms/step - accuracy: 0.6706 - loss: 0.7057 - val_accuracy: 0.8341 - val_loss: 0.3626
Epoch 2/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m289s[0m 926ms/step - accuracy: 0.8716 - loss: 0.3203 - val_accuracy: 0.8581 - val_loss: 0.3313
Epoch 3/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m293s[0m 939ms/step - accuracy: 0.9090 - loss: 0.2298 - val_accuracy: 0.8517 - val_loss: 0.3410
Epoch 4/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m296s[0m 947ms/step - accuracy: 0.9175 - loss: 0.2052 - val_accuracy: 0.8525 - val_loss: 0.3663
Epoch 5/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m290s[0m 929ms/step - accuracy: 0.9374 - loss: 0.1622 - val_accuracy: 0.8425 - val_loss: 0.4481
Epoch 6/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m280s[0m 897ms/step - accuracy: 0.9522 - loss: 0.1298 - val_accuracy: 0.8305 - val_loss: 0.4582
Epoc

<keras.src.callbacks.history.History at 0x1c03e2bb990>

In [9]:
# Préparation des labels pour l'ensemble de test 
test_labels = label_encoder.transform(test_data['type'])
test_labels_categorical = to_categorical(test_labels)

# Prédiction sur l'ensemble de test
y_pred = model.predict(test_padded_sequences)
y_pred_classes = np.argmax(y_pred, axis=1)

[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 368ms/step
