In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%cd /content/drive/MyDrive/NLP

/content/drive/MyDrive/NLP


In [None]:
import json
import numpy as np
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, LSTM, Embedding, Dropout

In [None]:
# Téléchargement des ressources nécessaires pour NLTK
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
# Chargement des données depuis le fichier JSON
with open('Intents.json', 'r') as file:
    data = json.load(file)

In [None]:
# Initialisation du lemmatizer
lemmatizer = WordNetLemmatizer()

In [None]:
# Chargement des stopwords
stopwords = nltk.corpus.stopwords.words('english')

In [None]:
# Tokenisation, suppression des stop words et lemmatisation
tokenized_patterns = []
for intent in data['intents']:
    for pattern in intent['patterns']:
        words = word_tokenize(pattern.lower())
        words = [lemmatizer.lemmatize(word) for word in words if word not in stopwords]
        tokenized_patterns.append((words, intent['tag']))

In [None]:
# Prétraitement des données
all_words = []
tags = []
xy = []
for (pattern_words, tag) in tokenized_patterns:
    all_words.extend(pattern_words)
    xy.append((pattern_words, tag))
    if tag not in tags:
        tags.append(tag)

In [None]:
# Création du vocabulaire
vocab_size = 1000
tokenizer = Tokenizer(num_words=vocab_size, oov_token='<OOV>')
tokenizer.fit_on_texts(all_words)

In [None]:
# Conversion en séquences d'indices
X_train = []
y_train = []
for (pattern_words, tag) in xy:
    sequence = tokenizer.texts_to_sequences([pattern_words])[0]
    X_train.append(sequence)
    y_train.append(tags.index(tag))

X_train = pad_sequences(X_train, padding='post')
y_train = np.array(y_train)

In [None]:
# Division des données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [None]:
# Définition des paramètres du modèle
embedding_dim = 16
max_length = len(X_train[0])
output_dim = len(tags)

In [None]:
# Modèle LSTM
model = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=max_length),
    LSTM(128),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(output_dim, activation='softmax')
])

In [None]:
# Initialisation du modèle
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# Entraînement du modèle
model.fit(X_train, y_train, epochs=500, batch_size=8, verbose=1)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

<keras.src.callbacks.History at 0x79804012b7f0>

In [None]:
# Évaluation du modèle sur l'ensemble de test
loss, accuracy = model.evaluate(X_test, y_test, verbose=1)
print("Test Accuracy:", accuracy)

Test Accuracy: 0.07999999821186066


In [None]:
# Conversation avec le chatbot
def chat():
    print("Start talking with the bot (type quit to stop)!")
    while True:
        sentence = input("You: ")
        if sentence.lower() == "quit":
            break

        sequence = tokenizer.texts_to_sequences([word_tokenize(sentence.lower())])[0]
        sequence = pad_sequences([sequence], maxlen=max_length, padding='post')
        prediction = model.predict(sequence)
        predicted_tag = tags[np.argmax(prediction)]

        for intent in data['intents']:
            if intent['tag'] == predicted_tag:
                print("Bot:", np.random.choice(intent['responses']))
                break

In [None]:
chat()

Start talking with the bot (type quit to stop)!
You: comment appliquer la pédagogie avec les enfants
Bot: Utilisez des logiciels comme NVivo, ATLAS.ti ou MAXQDA pour faciliter l'analyse de contenu.
You: quelle est la relation entre la sociologie et l'éducation
Bot: La sociologie de l'éducation examine les façons dont l'éducation affecte et est affectée par la société et les interactions sociales.
