In [1]:
import joblib
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

In [2]:
import json
with open('intent.json', 'r', encoding='utf-8') as f:
    data = json.load(f)
    

In [3]:
X = joblib.load('X_data.pkl')              
y = joblib.load('y_data.pkl')             
label_encoder = joblib.load('label_encoder.pkl')

In [4]:
tokenizer = Tokenizer(oov_token="<OOV>")
tokenizer.fit_on_texts(X)
X_seq = tokenizer.texts_to_sequences(X)

In [5]:
max_len = max(len(seq) for seq in X_seq)
X_pad = pad_sequences(X_seq, maxlen=max_len, padding='post')

In [6]:
num_classes = len(set(y))
y_cat = to_categorical(y, num_classes=num_classes)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X_pad, y_cat, test_size=0.2, random_state=42)


In [8]:
model = Sequential([
    Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=64, input_length=max_len),
    LSTM(128),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dense(num_classes, activation='softmax')
])
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])




In [9]:
model.fit(X_train, y_train, epochs=50, batch_size=8, validation_split=0.1, verbose=1)


Epoch 1/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 45ms/step - accuracy: 0.0565 - loss: 2.3979 - val_accuracy: 0.0000e+00 - val_loss: 2.4119
Epoch 2/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.1268 - loss: 2.3897 - val_accuracy: 0.0000e+00 - val_loss: 2.4223
Epoch 3/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.1763 - loss: 2.3742 - val_accuracy: 0.0000e+00 - val_loss: 2.4357
Epoch 4/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.2710 - loss: 2.3477 - val_accuracy: 0.0000e+00 - val_loss: 2.4606
Epoch 5/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.2022 - loss: 2.2737 - val_accuracy: 0.0769 - val_loss: 2.4664
Epoch 6/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.2116 - loss: 2.0428 - val_accuracy: 0.0769 - val_loss: 2.3974
Epoch 7/50
[1m14/14[

<keras.src.callbacks.history.History at 0x2520a01be90>

In [10]:
loss, acc = model.evaluate(X_test, y_test)
print(f"\n LSTM Accuracy: {acc:.2f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.6129 - loss: 2.7622

 LSTM Accuracy: 0.61


In [11]:

y_pred = model.predict(X_test)
y_pred_labels = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 147ms/step


In [12]:
from sklearn.metrics import classification_report

labels = list(range(len(label_encoder.classes_)))

print(classification_report(
    y_true,
    y_pred_labels,
    labels=labels,
    target_names=label_encoder.classes_,
    zero_division=0  # avoids division error if a class is missing
))


                      precision    recall  f1-score   support

   admission_process       0.17      0.33      0.22         3
        appreciation       0.50      0.33      0.40         3
 company_information       1.00      0.80      0.89         5
     contact_details       1.00      0.75      0.86         4
      course_catalog       0.25      0.50      0.33         2
            farewell       0.00      0.00      0.00         2
 feedback_submission       0.67      1.00      0.80         2
            greeting       0.60      1.00      0.75         3
  internship_inquiry       1.00      0.67      0.80         3
     operating_hours       1.00      0.50      0.67         4
placement_assistance       0.00      0.00      0.00         0

            accuracy                           0.61        31
           macro avg       0.56      0.53      0.52        31
        weighted avg       0.70      0.61      0.62        31



In [13]:
model.save("lstm_chatbot_model.h5")
joblib.dump(tokenizer, 'tokenizer.pkl')



['tokenizer.pkl']