# ðŸ¤– IT Helpdesk Chatbot (NLP Project)

Improved version with:
- Train/Test split
- Lightweight spaCy model
- Model saving
- Cleaner preprocessing


In [None]:

import warnings
warnings.filterwarnings("ignore")

import numpy as np
import json
import re
import random
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import spacy

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

nlp = spacy.load("en_core_web_sm")


In [None]:

with open("intents.json") as file:
    data = json.load(file)


In [None]:

def preprocess(text):
    text = text.lower()
    text = re.sub(r"[^a-zA-Z ]", "", text)
    doc = nlp(text)
    return " ".join([token.lemma_ for token in doc if not token.is_punct])

patterns = []
labels = []
responses = []
classes = []

for intent in data["intents"]:
    for pattern in intent["patterns"]:
        patterns.append(preprocess(pattern))
        labels.append(intent["tag"])
    responses.append(intent["responses"])
    classes.append(intent["tag"])


In [None]:

tokenizer = Tokenizer()
tokenizer.fit_on_texts(patterns)
sequences = tokenizer.texts_to_sequences(patterns)
padded = pad_sequences(sequences)

label_index = {label: idx for idx, label in enumerate(set(labels))}
y = [label_index[label] for label in labels]
y = to_categorical(y)

X_train, X_test, y_train, y_test = train_test_split(
    padded, y, test_size=0.2, random_state=42
)


In [None]:

model = Sequential([
    Dense(128, activation="relu", input_shape=(X_train.shape[1],)),
    Dropout(0.5),
    Dense(64, activation="relu"),
    Dropout(0.5),
    Dense(y.shape[1], activation="softmax")
])

model.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

model.summary()


In [None]:

history = model.fit(
    X_train, y_train,
    epochs=35,
    batch_size=8,
    validation_data=(X_test, y_test),
    verbose=1
)


In [None]:

model.save("chatbot_model.h5")
print("Model saved successfully")


In [None]:

plt.figure(figsize=(14,5))
plt.subplot(1,2,1)
plt.plot(history.history["loss"], label="Training Loss")
plt.plot(history.history["val_loss"], label="Validation Loss")
plt.legend()

plt.subplot(1,2,2)
plt.plot(history.history["accuracy"], label="Training Accuracy")
plt.plot(history.history["val_accuracy"], label="Validation Accuracy")
plt.legend()

plt.show()
