In [4]:
import json
import numpy as np
import random
import string
import nltk
import time
from sklearn.model_selection import KFold, train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from nltk.stem import WordNetLemmatizer

import warnings
warnings.filterwarnings('ignore')

# Load dataset
with open('Dataset2.json') as file:
    data = json.load(file)

# Initialize lemmatizer
lemmatizer = WordNetLemmatizer()

# Preprocess function
def preprocess(sentence):
    sentence = sentence.lower()
    sentence = ''.join([char for char in sentence if char not in string.punctuation])
    words = nltk.word_tokenize(sentence)
    words = [lemmatizer.lemmatize(word) for word in words]
    return ' '.join(words)

# Extract patterns and tags
patterns = []
tags = []

for intent in data['intents']:
    for pattern in intent['patterns']:
        patterns.append(preprocess(pattern))
        tags.append(intent['tag'])

# Encode the tags
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(tags)

# Vectorize the patterns using CountVectorizer
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(patterns)

# Define the Naive Bayes model
def create_model():
    return MultinomialNB()

# K-Fold Cross-Validation
kf = KFold(n_splits=10, shuffle=True, random_state=42)

fold = 1
training_times = []
val_accuracies = []
val_precisions = []
val_recalls = []
val_f1s = []

for train_index, val_index in kf.split(X):
    print(f"Fold {fold}")
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]
    
    model = create_model()
    
    # Train the model and measure training time
    start_time = time.time()
    model.fit(X_train, y_train)
    training_time = time.time() - start_time
    training_times.append(training_time)

    # Predict on the validation set
    y_val_pred = model.predict(X_val)

    # Calculate metrics
    val_accuracy = accuracy_score(y_val, y_val_pred)
    val_precision = precision_score(y_val, y_val_pred, average='weighted')
    val_recall = recall_score(y_val, y_val_pred, average='weighted')
    val_f1 = f1_score(y_val, y_val_pred, average='weighted')
    
    val_accuracies.append(val_accuracy)
    val_precisions.append(val_precision)
    val_recalls.append(val_recall)
    val_f1s.append(val_f1)

    # Print metrics for the current fold
    print(f'Training Time: {training_time:.2f} seconds')
    print(f'Validation Accuracy: {val_accuracy:.4f}')
    print(f'Precision: {val_precision:.4f}')
    print(f'Recall: {val_recall:.4f}')
    print(f'F1 Score: {val_f1:.4f}')
    print("-" * 30)
    fold += 1

# Print average metrics
print("\nAverage Metrics Over All Folds:")
print(f'Average Training Time: {np.mean(training_times):.2f} seconds')
print(f'Average Validation Accuracy: {np.mean(val_accuracies):.4f}')
print(f'Average Precision: {np.mean(val_precisions):.4f}')
print(f'Average Recall: {np.mean(val_recalls):.4f}')
print(f'Average F1 Score: {np.mean(val_f1s):.4f}')

Fold 1
Training Time: 0.00 seconds
Validation Accuracy: 0.8182
Precision: 0.8636
Recall: 0.8182
F1 Score: 0.8182
------------------------------
Fold 2
Training Time: 0.00 seconds
Validation Accuracy: 0.7273
Precision: 0.6818
Recall: 0.7273
F1 Score: 0.6970
------------------------------
Fold 3
Training Time: 0.00 seconds
Validation Accuracy: 0.9091
Precision: 0.9091
Recall: 0.9091
F1 Score: 0.9091
------------------------------
Fold 4
Training Time: 0.00 seconds
Validation Accuracy: 0.9091
Precision: 0.9545
Recall: 0.9091
F1 Score: 0.9091
------------------------------
Fold 5
Training Time: 0.00 seconds
Validation Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1 Score: 1.0000
------------------------------
Fold 6
Training Time: 0.00 seconds
Validation Accuracy: 0.9000
Precision: 1.0000
Recall: 0.9000
F1 Score: 0.9333
------------------------------
Fold 7
Training Time: 0.00 seconds
Validation Accuracy: 0.9000
Precision: 1.0000
Recall: 0.9000
F1 Score: 0.9333
-----------------------

In [5]:
# Chatbot response function
def get_response(user_input):
    user_input = preprocess(user_input)
    sequence = vectorizer.transform([user_input])
    prediction = model.predict(sequence)
    tag = label_encoder.inverse_transform(prediction)[0]
    
    for intent in data['intents']:
        if intent['tag'] == tag:
            return random.choice(intent['responses'])

print("Chatbot is ready! Type 'quit' to exit.")

while True:
    user_input = input("You: ")
    if user_input.lower() == 'quit':
        break
    response = get_response(user_input)
    print("Bot:", response)

Chatbot is ready! Type 'quit' to exit.


You:  what is stance on ai


Bot: The University's academic integrity policy (UPR AS14 Appendix III) [https://www.herts.ac.uk/__data/assets/pdf_file/0007/237625/AS14-Apx3-Academic-Misconduct-v17.0.pdf]sets out our stance on plagiarism including fake referencing which can often be the case with AI tools. Therefore, it is crucial thatyou do not use AI toolsto generate an assessment and submit it as your own work; to do so will constitute academic misconduct.


You:  quuit


Bot: Hello! How can I assist you today?


You:  quit


In [12]:
import json
import numpy as np
import random
import string
import nltk
from sklearn.model_selection import KFold, train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from nltk.stem import WordNetLemmatizer
import warnings

warnings.filterwarnings('ignore')

# Load dataset
with open('Combined_training.json') as file:
    data = json.load(file)

# Initialize lemmatizer
lemmatizer = WordNetLemmatizer()

# Preprocess function
def preprocess(sentence):
    sentence = sentence.lower()
    sentence = ''.join([char for char in sentence if char not in string.punctuation])
    words = nltk.word_tokenize(sentence)
    words = [lemmatizer.lemmatize(word) for word in words]
    return ' '.join(words)

# Extract patterns and tags
patterns = []
tags = []

for intent in data['intents']:
    for pattern in intent['patterns']:
        patterns.append(preprocess(pattern))
        tags.append(intent['tag'])

# Encode the tags
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(tags)

# Vectorize the patterns using CountVectorizer
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(patterns)


In [13]:
# Define the Naive Bayes model
def create_model(alpha=1.0, fit_prior=True):
    return MultinomialNB(alpha=alpha, fit_prior=fit_prior)

# Hyperparameter tuning with Grid Search
param_grid = {
    'alpha': [0.0, 0.5, 1.0, 2.0],
    'fit_prior': [True, False]
}

grid_search = GridSearchCV(MultinomialNB(), param_grid, cv=10, scoring='accuracy')
grid_search.fit(X, y)

# Best parameters from Grid Search
best_params = grid_search.best_params_
print(f"Best parameters from Grid Search: {best_params}")


Best parameters from Grid Search: {'alpha': 2.0, 'fit_prior': True}


In [14]:
# Train and evaluate the best model
model = create_model(alpha=best_params['alpha'], fit_prior=best_params['fit_prior'])
kf = KFold(n_splits=10, shuffle=True, random_state=42)

fold = 1
training_times = []
val_accuracies = []
val_precisions = []
val_recalls = []
val_f1s = []

for train_index, val_index in kf.split(X):
    print(f"Fold {fold}")
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]
    
    # Train the model and measure training time
    start_time = time.time()
    model.fit(X_train, y_train)
    training_time = time.time() - start_time
    training_times.append(training_time)

    # Predict on the validation set
    y_val_pred = model.predict(X_val)

    # Calculate metrics
    val_accuracy = accuracy_score(y_val, y_val_pred)
    val_precision = precision_score(y_val, y_val_pred, average='weighted')
    val_recall = recall_score(y_val, y_val_pred, average='weighted')
    val_f1 = f1_score(y_val, y_val_pred, average='weighted')
    
    val_accuracies.append(val_accuracy)
    val_precisions.append(val_precision)
    val_recalls.append(val_recall)
    val_f1s.append(val_f1)

    # Print metrics for the current fold
    print(f'Training Time: {training_time:.2f} seconds')
    print(f'Validation Accuracy: {val_accuracy:.4f}')
    print(f'Precision: {val_precision:.4f}')
    print(f'Recall: {val_recall:.4f}')
    print(f'F1 Score: {val_f1:.4f}')
    print("-" * 30)
    fold += 1

# Print average metrics
print("\nAverage Metrics Over All Folds:")
print(f'Average Training Time: {np.mean(training_times):.2f} seconds')
print(f'Average Validation Accuracy: {np.mean(val_accuracies):.4f}')
print(f'Average Precision: {np.mean(val_precisions):.4f}')
print(f'Average Recall: {np.mean(val_recalls):.4f}')
print(f'Average F1 Score: {np.mean(val_f1s):.4f}')


Fold 1
Training Time: 0.00 seconds
Validation Accuracy: 0.6364
Precision: 0.7273
Recall: 0.6364
F1 Score: 0.6667
------------------------------
Fold 2
Training Time: 0.00 seconds
Validation Accuracy: 0.9091
Precision: 0.9091
Recall: 0.9091
F1 Score: 0.9091
------------------------------
Fold 3
Training Time: 0.00 seconds
Validation Accuracy: 0.9091
Precision: 0.9091
Recall: 0.9091
F1 Score: 0.9091
------------------------------
Fold 4
Training Time: 0.00 seconds
Validation Accuracy: 0.8182
Precision: 0.8182
Recall: 0.8182
F1 Score: 0.8182
------------------------------
Fold 5
Training Time: 0.00 seconds
Validation Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1 Score: 1.0000
------------------------------
Fold 6
Training Time: 0.00 seconds
Validation Accuracy: 0.9000
Precision: 1.0000
Recall: 0.9000
F1 Score: 0.9333
------------------------------
Fold 7
Training Time: 0.00 seconds
Validation Accuracy: 0.9000
Precision: 1.0000
Recall: 0.9000
F1 Score: 0.9333
-----------------------

In [15]:
# Chatbot response function
def get_response(user_input):
    user_input = preprocess(user_input)
    sequence = vectorizer.transform([user_input])
    prediction = model.predict(sequence)
    tag = label_encoder.inverse_transform(prediction)[0]
    
    for intent in data['intents']:
        if intent['tag'] == tag:
            return random.choice(intent['responses'])

print("Chatbot is ready! Type 'quit' to exit.")

while True:
    user_input = input("You: ")
    if user_input.lower() == 'quit':
        break
    response = get_response(user_input)
    print("Bot:", response)


Chatbot is ready! Type 'quit' to exit.


You:  hi


Bot: Hey! How can I help?


You:  how are you


Bot: Hello! How can I assist you today?


You:  quit


In [1]:
import json
import numpy as np
import random
import string
import nltk
import time
from sklearn.model_selection import KFold, train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from nltk.stem import WordNetLemmatizer
import warnings
warnings.filterwarnings('ignore')


In [2]:
with open('Combined_training.json') as file:
    data = json.load(file)


In [3]:
# Initialize lemmatizer
lemmatizer = WordNetLemmatizer()

# Preprocess function
def preprocess(sentence):
    sentence = sentence.lower()
    sentence = ''.join([char for char in sentence if char not in string.punctuation])
    words = nltk.word_tokenize(sentence)
    words = [lemmatizer.lemmatize(word) for word in words]
    return ' '.join(words)


In [4]:
patterns = []
tags = []

for intent in data['intents']:
    for pattern in intent['patterns']:
        patterns.append(preprocess(pattern))
        tags.append(intent['tag'])


In [5]:
# Encode the tags
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(tags)


In [6]:
# Vectorize the patterns using CountVectorizer
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(patterns)


In [7]:
# Define the Naive Bayes model
def create_model(alpha=1.0, fit_prior=True):
    return MultinomialNB(alpha=alpha, fit_prior=fit_prior)


In [8]:
# Hyperparameter tuning with Grid Search
param_grid = {
    'alpha': [0.0, 0.5, 1.0, 2.0],
    'fit_prior': [True, False]
}

grid_search = GridSearchCV(MultinomialNB(), param_grid, cv=10, scoring='accuracy')
grid_search.fit(X, y)

# Best parameters from Grid Search
best_params = grid_search.best_params_
print(f"Best parameters from Grid Search: {best_params}")


Best parameters from Grid Search: {'alpha': 2.0, 'fit_prior': True}


In [9]:
# Train and evaluate the best model
model = create_model(alpha=best_params['alpha'], fit_prior=best_params['fit_prior'])
kf = KFold(n_splits=10, shuffle=True, random_state=42)

fold = 1
training_times = []
val_accuracies = []
val_precisions = []
val_recalls = []
val_f1s = []

for train_index, val_index in kf.split(X):
    print(f"Fold {fold}")
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]
    
    # Train the model and measure training time
    start_time = time.time()
    model.fit(X_train, y_train)
    training_time = time.time() - start_time
    training_times.append(training_time)

    # Predict on the validation set
    y_val_pred = model.predict(X_val)

    # Calculate metrics
    val_accuracy = accuracy_score(y_val, y_val_pred)
    val_precision = precision_score(y_val, y_val_pred, average='weighted')
    val_recall = recall_score(y_val, y_val_pred, average='weighted')
    val_f1 = f1_score(y_val, y_val_pred, average='weighted')
    
    val_accuracies.append(val_accuracy)
    val_precisions.append(val_precision)
    val_recalls.append(val_recall)
    val_f1s.append(val_f1)

    # Print metrics for the current fold
    print(f'Training Time: {training_time:.2f} seconds')
    print(f'Validation Accuracy: {val_accuracy:.4f}')
    print(f'Precision: {val_precision:.4f}')
    print(f'Recall: {val_recall:.4f}')
    print(f'F1 Score: {val_f1:.4f}')
    print("-" * 30)
    fold += 1


Fold 1
Training Time: 0.00 seconds
Validation Accuracy: 0.6364
Precision: 0.7273
Recall: 0.6364
F1 Score: 0.6667
------------------------------
Fold 2
Training Time: 0.00 seconds
Validation Accuracy: 0.9091
Precision: 0.9091
Recall: 0.9091
F1 Score: 0.9091
------------------------------
Fold 3
Training Time: 0.00 seconds
Validation Accuracy: 0.9091
Precision: 0.9091
Recall: 0.9091
F1 Score: 0.9091
------------------------------
Fold 4
Training Time: 0.00 seconds
Validation Accuracy: 0.8182
Precision: 0.8182
Recall: 0.8182
F1 Score: 0.8182
------------------------------
Fold 5
Training Time: 0.00 seconds
Validation Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1 Score: 1.0000
------------------------------
Fold 6
Training Time: 0.00 seconds
Validation Accuracy: 0.9000
Precision: 1.0000
Recall: 0.9000
F1 Score: 0.9333
------------------------------
Fold 7
Training Time: 0.00 seconds
Validation Accuracy: 0.9000
Precision: 1.0000
Recall: 0.9000
F1 Score: 0.9333
-----------------------

In [10]:
# Print average metrics
print("\nAverage Metrics Over All Folds:")
print(f'Average Training Time: {np.mean(training_times):.2f} seconds')
print(f'Average Validation Accuracy: {np.mean(val_accuracies):.4f}')
print(f'Average Precision: {np.mean(val_precisions):.4f}')
print(f'Average Recall: {np.mean(val_recalls):.4f}')
print(f'Average F1 Score: {np.mean(val_f1s):.4f}')



Average Metrics Over All Folds:
Average Training Time: 0.00 seconds
Average Validation Accuracy: 0.8673
Average Precision: 0.8964
Average Recall: 0.8673
Average F1 Score: 0.8770


In [11]:
# Chatbot response function
def get_response(user_input):
    user_input = preprocess(user_input)
    sequence = vectorizer.transform([user_input])
    prediction = model.predict(sequence)
    tag = label_encoder.inverse_transform(prediction)[0]
    
    for intent in data['intents']:
        if intent['tag'] == tag:
            return random.choice(intent['responses'])


In [None]:
print("Chatbot is ready! Type 'quit' to exit.")

while True:
    user_input = input("You: ")
    if user_input.lower() == 'quit':
        break
    response = get_response(user_input)
    print("Bot:", response)


Chatbot is ready! Type 'quit' to exit.


You:  hi


Bot: Hello! How can I assist you today?


You:  im disable


Bot: Hi there! What can I help you with?


You:  im suffering from mental health


Bot: If your studies are affecting your mental health and wellbeing then pleasecontact our Student Wellbeing team [https://ask.herts.ac.uk/student-wellbeing]for support. You can alsocall our free 24/7 wellbeing helpline [https://ask.herts.ac.uk/student-wellbeing-24-hour-helpline]ran by Health Assured whenever you need to. You can also talk to staff in your School if you’re struggling.


You:  im struggling in assignments 


Bot: If you’re struggling with assignment deadlines and need more time, you could apply for an extension. If you think you need an additional seven days to hand in your assessment, you couldapply for a coursework extension [https://ask.herts.ac.uk/coursework-extensions]. You must apply for an extension before you submit your work. If you think you can’t submit your work by the deadline due to unexpected circumstances outside of your control, you couldsubmit an exceptional circumstances request. [https://ask.herts.ac.uk/exceptional-circumstances]In the first instance, please talk to your personal tutor, programme leader or cohort leader as they will be able to help and advise you. You must submit your exceptional circumstances request before you submit your work. If you are unsure or need more information, please talk to your personal tutor, programme leader or cohort leader.


You:  im need support from herts


Bot: Herts SU’s Advice and Support team [https://hertssu.com/your-support/]provides free, confidential, and impartial advice and support on a range of student issues, including those of an academic nature, such as information on academic appeals and academic misconduct through to study skills support. If you want to get in touch with the team for advice regarding your academic experience, you canbook an appointment. [https://hertssu.com/your-support/] Herts SU also runsStudy Smart [https://hertssu.com/your-support/academic-advice/study-smart/], a scheme which is designed to accommodate your unique learning preferences to make you feel more confident with your assignments, revision techniques and exams. Sessions are one-to-one tutorials where you'll be exploring new study strategies that are designed to lead to efficient and interesting ways that you can use to approach your assignments and deadlines.


You:  what are the places to study in uni


Bot: At Herts, we have a variety of places that you can study from. We haveLearning Resources Centres (LRCs) [https://ask.herts.ac.uk/learning-resources-centres]on both campuses which are open 24/7, 365 days a year and have silent study areas, breakout rooms, group areas, computers and much more for you to use. If you’d like to study in one of our food outlets, visit the Art and Design Gallery Café and Café Rore on College Lane campus, and Café Ambition and Café Sport on de Havilland campus so you can work and get your caffeine fix. There are also other places across the University such as the Chapman Lounge and Hutton Hub on College Lane campus and the mezzanine and Grace Ononiwu Law Court building on de Havilland campus which have plenty of seating for you to use. View our TikTok of study spaces around campus. [https://www.tiktok.com/@uniofherts/video/7171372715234823430?_r=1&_t=8XlCqlMWi3R&dm_i=3CZ%2C84G9B%2C7FAFLN%2CX9JEK%2C1&is_from_webapp=v1&item_id=7171372715234823430]


You:  what is lrc


Bot: The LRCs offer both online and offline support to help you with your studies at Herts. Their services include: Library SkillUP [https://ask.herts.ac.uk/library-skillup]– available on Canvas, Library SkillUP [https://ask.herts.ac.uk/library-skillup] consists of online modules to help guide you through your time at Herts. You can learn more about referencing, academic reading techniques, how to think and write critically and much more. Study Success Hubs [https://ask.herts.ac.uk/study-success-hub]– located on the ground floor of each LRC, the Study Success Hubs [https://ask.herts.ac.uk/study-success-hub] are here to help you succeed by helping you find the best resources for your assignments, tips on how to manage your workload and more! Information Managers [https://ask.herts.ac.uk/your-information-manager]– each subject area has a dedicated Information Manager who can help you find and use the most appropriate resources for your subject.
