In [55]:
#Step 1: Setting Up the Environment and Uploading Files

In [None]:
# Install necessary packages
!pip install nltk tensorflow keras numpy textblob spacy
!pip install SpeechRecognition PyAudio


# Download NLTK data
import nltk
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')

# Download SpaCy model
!python -m spacy download en_core_web_sm

# Upload intents.json file (manually upload via Google Colab UI)
from google.colab import files
uploaded = files.upload()

import json

# Load intents data
with open('intents.json', 'r') as file:
    intents = json.load(file)




[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m30.9 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
#Step 2: Import Libraries and Initialize Variables

In [None]:
!pip install SpeechRecognition pydub
!apt install libasound2-dev portaudio19-dev libportaudio2 libportaudiocpp0
!pip install pyaudio

import nltk
from nltk.stem import WordNetLemmatizer
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout
import random
import warnings
from keras.optimizers import Adam
import tensorflow as tf
import speech_recognition as sr

warnings.filterwarnings('ignore')

lemmatizer = WordNetLemmatizer()

In [None]:
import json
import pickle

# Assuming 'intents.json' contains your intents data
with open('intents.json', 'r') as file:
    intents = json.load(file)

# Initialize lemmatizer and lists to store words and classes
lemmatizer = WordNetLemmatizer()
words = []
classes = []

# Process each intent
for intent in intents['intents']:
    for pattern in intent['patterns']:
        # Tokenize each word
        tokens = nltk.word_tokenize(pattern)
        words.extend(tokens)
        # Add to classes list
        classes.append(intent['tag'])

# Lemmatize and lower each word and remove duplicates
words = [lemmatizer.lemmatize(word.lower()) for word in words if word.isalnum()]
words = sorted(set(words))

# Sort classes
classes = sorted(set(classes))

# Save words and classes to pickle files
with open('words.pkl', 'wb') as f:
    pickle.dump(words, f)

with open('classes.pkl', 'wb') as f:
    pickle.dump(classes, f)


In [None]:
#Step 3: Data Augmentation and Preprocessing

In [None]:
from textblob import TextBlob
from nltk.corpus import wordnet
from itertools import product

def augment_data(patterns):
    augmented_patterns = []
    for pattern in patterns:
        blob = TextBlob(pattern)
        for sentence in blob.sentences:
            synonyms = []
            for word, pos in sentence.tags:
                if pos in ('NN', 'VB', 'JJ'):
                    synonyms.append(get_synonyms(word))
                else:
                    synonyms.append([word])
            for synonym_set in product(*synonyms):
                augmented_patterns.append(' '.join(synonym_set))
                if len(augmented_patterns) > 500:  # Limit augmented patterns
                    break
    return augmented_patterns

def get_synonyms(word):
    synonyms = [word]
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonym = lemma.name().replace('_', ' ')
            if synonym not in synonyms:
                synonyms.append(synonym)
    return synonyms

# Augment data
for intent in intents['intents']:
    original_patterns = intent['patterns']
    augmented_patterns = augment_data(original_patterns)
    intent['patterns'].extend(augmented_patterns)

# Data Preprocessing
words = []
classes = []
documents = []
ignore_words = ['?', '!', '.', ',']

for intent in intents['intents']:
    for pattern in intent['patterns']:
        # Tokenize each word
        w = nltk.word_tokenize(pattern)
        words.extend(w)
        # Add documents with a combination of patterns and intents
        documents.append((w, intent['tag']))
        # Add to our classes list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

# Lemmatize, lower each word and remove duplicates
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))

# Sort classes
classes = sorted(list(set(classes)))

# Print statements to check the process
print(len(documents), "documents")
print(len(classes), "classes", classes)
print(len(words), "unique lemmatized words", words)


In [None]:
#Step 4: Create Training and Testing Data

In [None]:
import numpy as np
import random

# Initialize training data
training = []
output_empty = np.zeros(len(classes))

for doc in documents:
    bag = []
    pattern_words = doc[0]
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)

    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1

    training.append((bag, output_row))

random.shuffle(training)

train_x = np.array([pattern for pattern, _ in training])
train_y = np.array([intent for _, intent in training])

print("Training data created")

In [None]:
#Step 5: Build and Train the Model

In [None]:
from keras.callbacks import EarlyStopping


# Build the model
model = Sequential()
model.add(Dense(64, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

# Compile the model
optimizer = Adam(learning_rate=0.0001)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Prepare the training dataset
train_dataset = tf.data.Dataset.from_tensor_slices((train_x, train_y))
train_dataset = train_dataset.batch(8).prefetch(tf.data.AUTOTUNE)

# Set up early stopping
early_stopping = EarlyStopping(monitor='loss', patience=10)

# Train the model
hist = model.fit(
    train_dataset,
    epochs=10,
    callbacks=[early_stopping],
    verbose=1
)

# Save the model
model.save('chatbot_model.h5')
print("Model created and saved")

In [None]:
#Step 6: Load Model and Dependencies

In [None]:
from keras.models import load_model
import json

model = load_model('chatbot_model.h5')

with open('intents.json', 'r') as file:
    intents = json.load(file)

# Reload the words and classes from previous steps
words = sorted(list(set(words)))
classes = sorted(list(set(classes)))


In [None]:
#Step 7: Define Utility Functions for Prediction and Conversation

In [None]:
import numpy as np
import spacy
!pip install --upgrade pyaudio

nlp = spacy.load('en_core_web_sm')

def advanced_clean_up_sentence(sentence):
    doc = nlp(sentence)
    tokens = [token.lemma_.lower() for token in doc if not token.is_stop and not token.is_punct]
    return tokens

def clean_up_sentence(sentence):
    return advanced_clean_up_sentence(sentence)

def bow(sentence, words, show_details=True):
    sentence_words = clean_up_sentence(sentence)
    bag = [0] * len(words)
    for s in sentence_words:
        for i, w in enumerate(words):
            if w == s:
                bag[i] = 1
                if show_details:
                    print(f"Found in bag: {w}")
    return np.array(bag)

def predict_class(sentence, model):
    p = bow(sentence, words, show_details=False)
    res = model.predict(np.array([p]))[0]
    ERROR_THRESHOLD = 0.25
    results = [[i, r] for i, r in enumerate(res) if r > ERROR_THRESHOLD]
    if not results:
        return [{"intent": "unknown", "probability": "1.0"}]
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = [{"intent": classes[r[0]], "probability": str(r[1])} for r in results]
    return return_list

fallback_responses = ["I'm sorry, I don't understand. Could you please rephrase?",
                      "I'm not sure how to help with that. Could you provide more details?"]

def get_response(ints, intents_json):
    tag = ints[0]['intent']
    if tag == "unknown":
        return random.choice(fallback_responses)
    for i in intents_json['intents']:
        if i['tag'] == tag:
            result = random.choice(i['responses'])
            break
    return result

user_feedback = []

def chatbot_response(msg):
    ints = predict_class(msg, model)
    res = get_response(ints, intents)
    user_feedback.append({"input": msg, "predicted_intent": ints[0]["intent"], "response": res})
    return res

def start_conversation():
    print("Hi there! How can I help you today?")

def continue_conversation():
    message = input("")
    if message.lower() == "quit":
        return False
    response = chatbot_response(message)
    print("Bot:", response)
    return True

# def continue_conversation():
#     print("Speak or type your message (type 'quit' to exit):")
#     user_input = get_audio()  # Call function to get speech input
#     if user_input.lower() == "quit":
#         return False
#     response = chatbot_response(user_input)
#     print("Bot:", response)
#     return True

def main_conversation():
    start_conversation()
    conversation_active = True
    while conversation_active:
        conversation_active = continue_conversation()
    print("Goodbye! Take care.")

main_conversation()

with open('user_feedback.json', 'w') as f:
    json.dump(user_feedback, f, indent=4)

In [None]:
from google.colab import files
files.download('/content/chatbot_model.h5')
files.download('/content/intents (1).json')
files.download('/content/intents.json')
files.download('/content/words.pkl')
files.download('/content/classes.pkl')