In [1]:
%pip list

Package           VersionNote: you may need to restart the kernel to use updated packages.

----------------- -----------
asttokens         2.4.1
blinker           1.8.2
click             8.1.7
colorama          0.4.6
comm              0.2.2
debugpy           1.8.1
decorator         5.1.1
executing         2.0.1
Flask             3.0.3
ipykernel         6.29.4
ipython           8.24.0
itsdangerous      2.2.0
jedi              0.19.1
Jinja2            3.1.4
joblib            1.4.2
jupyter_client    8.6.1
jupyter_core      5.7.2
MarkupSafe        2.1.5
matplotlib-inline 0.1.7
nest-asyncio      1.6.0
nltk              3.8.1
numpy             1.26.4
packaging         24.0
parso             0.8.4
pip               24.0
platformdirs      4.2.1
prompt-toolkit    3.0.43
psutil            5.9.8
pure-eval         0.2.2
Pygments          2.18.0
python-dateutil   2.9.0.post0
pywin32           306
pyzmq             26.0.3
regex             2024.5.10
scikit-learn      1.4.2
scipy             1.13.0


In [2]:
import nltk
import random
import json
from nltk.corpus import wordnet
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.stem import WordNetLemmatizer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import accuracy_score

In [5]:
# Load intents from JSON file
with open("./data/intents.json", 'r', encoding='utf-8') as file:
    intents = json.load(file)

In [6]:
# Function to perform synonym replacement
def synonym_replacement(tokens, limit):
    augmented_sentences = []
    for i in range(len(tokens)):
        synonyms = []
        for syn in wordnet.synsets(tokens[i]):
            for lemma in syn.lemmas():
                synonyms.append(lemma.name())
        if len(synonyms) > 0:
            num_augmentations = min(limit, len(synonyms))
            sampled_synonyms = random.sample(synonyms, num_augmentations)
            for synonym in sampled_synonyms:
                augmented_tokens = tokens[:i] + [synonym] + tokens[i+1:]
                augmented_sentences.append(' '.join(augmented_tokens))
    return augmented_sentences

In [7]:
text_data = []
labels = []
stopwords = set(nltk.corpus.stopwords.words('english'))
lemmatizer = WordNetLemmatizer()


limit_per_tag = 100

for intent in intents['intents']:
    augmented_sentences_per_tag = 0
    for example in intent['patterns']:
        tokens = nltk.word_tokenize(example.lower())
        filtered_tokens = [lemmatizer.lemmatize(token) for token in tokens if token not in stopwords and token.isalpha()]
        if filtered_tokens:
            text_data.append(' '.join(filtered_tokens))
            labels.append(intent['tag'])
            
            augmented_sentences = synonym_replacement(filtered_tokens, limit_per_tag - augmented_sentences_per_tag)
            for augmented_sentence in augmented_sentences:
                text_data.append(augmented_sentence)
                labels.append(intent['tag'])
                augmented_sentences_per_tag += 1
                if augmented_sentences_per_tag >= limit_per_tag:
                    break

print(len(text_data))
print(len(labels))

15151
15151


In [8]:
text_data[:5]

['goodbye', 'sayonara', 'goodbye', 'so_long', 'adios']

In [9]:
labels[:5]

['goodbye', 'goodbye', 'goodbye', 'goodbye', 'goodbye']

In [10]:
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(text_data)
y = labels

In [9]:
def find_best_model(X, y, test_size=0.2):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=100)


    models = [
        # ('Logistic Regression', LogisticRegression(), {
        #     'penalty': ['l2'],
        #     'C': [0.1, 1.0, 10.0],
        #     'solver': ['liblinear'],
        #     'max_iter': [100, 1000, 10000]
        # }),
        ('Multinomial Naive Bayes', MultinomialNB(), {'alpha': [0.1, 0.5, 1.0]})
        # ('Linear SVC', LinearSVC(), {
        #     'penalty': ['l2'],
        #     'loss': ['hinge', 'squared_hinge'],
        #     'C': [0.1, 1, 10],
        #     'max_iter': [100, 1000, 10000]
        # }),
        # ('Decision Tree', DecisionTreeClassifier(), {
        #     'max_depth': [5, 10, 20, None],
        #     'min_samples_split': [2, 5, 10],
        #     'min_samples_leaf': [1, 2, 4],
        #     'criterion': ['gini', 'entropy']
        # }),
        # ('Random Forest', RandomForestClassifier(), {
        #     'n_estimators': [100, 200, 300],
        #     'max_depth': [10, 20, None],
        #     'min_samples_split': [2, 5, 10],
        #     'min_samples_leaf': [1, 2, 4]
        # })
    ]

    for name, model, param_grid in models:
        grid = GridSearchCV(model, param_grid, cv=3, n_jobs=-1)
        grid.fit(X_train, y_train)
        y_pred = grid.predict(X_test)
        score = accuracy_score(y_test, y_pred)
        print(f'{name}: {score:.4f} (best parameters: {grid.best_params_})')

    best_model = max(models, key=lambda x: GridSearchCV(x[1], x[2], cv=3, n_jobs=-1).fit(X_train, y_train).score(X_test, y_test))
    print(f'\nBest model: {best_model[0]}')

    # Fit the best model to the full training data
    best_model[1].fit(X, y)

    return best_model[1]

In [14]:
best_model = find_best_model(X, y)

Logistic Regression: 0.9197 (best parameters: {'C': 10.0, 'max_iter': 100, 'penalty': 'l2', 'solver': 'liblinear'})
Multinomial Naive Bayes: 0.8654 (best parameters: {'alpha': 0.5})
Linear SVC: 0.9219 (best parameters: {'C': 1, 'loss': 'squared_hinge', 'max_iter': 100, 'penalty': 'l2'})
Decision Tree: 0.9110 (best parameters: {'criterion': 'gini', 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2})
Random Forest: 0.9334 (best parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 10, 'n_estimators': 300})

Best model: Random Forest


## Multinomial Naive Bayes

In [11]:
def naive_bayes_model(X, y, test_size=0.2):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=100)

    name, model, param_grid = ('Multinomial Naive Bayes', MultinomialNB(), {'alpha': [0.1, 0.5, 1.0]})

    grid = GridSearchCV(model, param_grid, cv=3, n_jobs=-1)
    grid.fit(X_train, y_train)
    y_pred = grid.predict(X_test)
    score = accuracy_score(y_test, y_pred)
    print(f'{name}: {score:.4f} (best parameters: {grid.best_params_})')

    return grid

In [12]:
naive_model = naive_bayes_model(X, y)

Multinomial Naive Bayes: 0.8984 (best parameters: {'alpha': 0.5})


In [13]:
def preprocess_input(text):
    tokens = nltk.word_tokenize(text.lower())
    tokens = [token for token in tokens if len(token) > 1]
    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens]
    return ' '.join(lemmatized_tokens)

In [14]:
def chatbot_response(user_input):
    user_input = preprocess_input(user_input)
    print(user_input)
    input_text = vectorizer.transform([user_input])
    # print('Input Text: ', input_text)
    
    predicted_intent = naive_model.predict(input_text)[0]
    
    for intent in intents['intents']:
        if intent['tag'] == predicted_intent:
            response = random.choice(intent['responses'])
            break
    
    return response

In [162]:
print('Hello! I am a chatbot. How can I help you today? Type "quit" to exit.')
while True:
    user_input = input('> ')
    if user_input.lower() == 'quit':
        break
    response = chatbot_response(user_input)
    print(user_input)
    print(response)
    print('----------------------------------------------------------------')

Hello! I am a chatbot. How can I help you today? Type "quit" to exit.


be who you
be who you
Hi there! How can I help you today?
----------------------------------------------------------------
who are you
who are you?
Good to see you! How can I assist you?
----------------------------------------------------------------
what is your name
what is your name?
My name's TekBot, and I'm here to answer your curiosity about Technological University of the Philippines (TUP)!
----------------------------------------------------------------
hello
hello
Hello! How can I assist you?
----------------------------------------------------------------
what should call you
what should i call you?
My name's TekBot, and I'm here to answer your curiosity about Technological University of the Philippines (TUP)!
----------------------------------------------------------------
is there scholarship
is there a scholarship?
Sure! Here's the link to apply for a scholarship at TUP Manila: [Apply for Scholarship](https://tup.edu.ph/pages/students/apply-for-scholarship)

**SCHOLARSHIP

## Random Forest

In [145]:
def random_forest_model(X, y, test_size=0.2):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=100)

    name, model, param_grid = ('Random Forest', RandomForestClassifier(), {
            'n_estimators': [100, 200, 300],
            'max_depth': [10, 20, None],
            'min_samples_split': [2, 5, 10],
            'min_samples_leaf': [1, 2, 4]
        })

    grid = GridSearchCV(model, param_grid, cv=3, n_jobs=-1)
    grid.fit(X_train, y_train)
    y_pred = grid.predict(X_test)
    score = accuracy_score(y_test, y_pred)
    print(f'{name}: {score:.4f} (best parameters: {grid.best_params_})')

    return grid

In [146]:
randomForest_model = random_forest_model(X, y)

Random Forest: 0.9225 (best parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 300})


In [148]:
def chatbotRF_response(user_input):
    user_input = preprocess_input(user_input)
    print(user_input)
    input_text = vectorizer.transform([user_input])
    # print('Input Text: ', input_text)
    
    predicted_intent = randomForest_model.predict(input_text)[0]
    
    for intent in intents['intents']:
        if intent['tag'] == predicted_intent:
            response = random.choice(intent['responses'])
            break
    
    return response

In [149]:
print('Hello! I am a chatbot. How can I help you today? Type "quit" to exit.')
while True:
    user_input = input('> ')
    if user_input.lower() == 'quit':
        break
    if len(user_input) > 0: response = chatbotRF_response(user_input)
    else: response = "I'm sorry, but I'm not sure how to respond to that."
    print(user_input)
    print(response)
    print('----------------------------------------------------------------')

Hello! I am a chatbot. How can I help you today? Type "quit" to exit.
hello
Hello
Hello! How can I assist you today? If you have any questions or need help with something, feel free to ask!
----------------------------------------------------------------
tell me your name
tell me your name
My name's TekBot, and I'm here to answer your curiosity about Technological University of the Philippines (TUP)!
----------------------------------------------------------------
who are you
who are you?
My name's TekBot, and I'm here to answer your curiosity about Technological University of the Philippines (TUP)!
----------------------------------------------------------------
thanks
thanks
You're welcome! Is there anything else I can assist you with?
----------------------------------------------------------------
what should call you
what should i call you?
My name's TekBot, and I'm here to answer your curiosity about Technological University of the Philippines (TUP)!
-----------------------------

## Others

In [103]:
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer
# Initialize NLTK components
lemmatizer = WordNetLemmatizer()
stemmer = PorterStemmer()

def preprocess_input(text):
    tokens = word_tokenize(text.lower())
    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens]
    stemmed_tokens = [stemmer.stem(token) for token in lemmatized_tokens]
    return ' '.join(stemmed_tokens)

def chatbot_response(user_input):
    input_text = preprocess_input(user_input)
    input_text = vectorizer.transform([user_input])
    print('Input Text: ', input_text)
    
    predicted_intent = best_model.predict(input_text)[0]
    print(predicted_intent)
    
    for intent in intents['intents']:
        if intent['tag'] == predicted_intent:
            response = random.choice(intent['responses'])
            break
    
    if not response:
        response = "I'm sorry, but I'm not sure how to respond to that."
    
    return response

# Example usage
response = chatbot_response("who is the principal of tup?")
print(response)

Input Text:    (0, 1463)	0.43461341829810424
  (0, 1135)	0.9006171087844362
principal
The current principal of TUP is Engr. Reynaldo P. Ramos, PhD., EnP.


In [124]:
import numpy as np

def chatbot_response(user_input, best_model, intents):
    input_text = vectorizer.transform([user_input])
    predicted_intent = best_model.predict(input_text)[0]
    print(predicted_intent)
    
    # Get predictions and confidence scores
    predictions = best_model.predict_proba(input_text)[0]
    predicted_intent_index = predictions.argmax()  # Index of highest prediction
    print(predictions)
    print(max(predictions))
    print(predicted_intent_index)
    print(intents['intents'][predicted_intent_index]['responses'][0])
    
    for intent in intents['intents']:
        if intent['tag'] == predicted_intent:
            response = random.choice(intent['responses'])
            print(response)
            break
    # Choose the appropriate response based on confidence
    # if confidence_scores[predicted_intent_index] >= 0.7:  # High confidence threshold (adjustable)
    #     predicted_intent = intents['intents'][predicted_intent_index]['tag']
    #     response = random.choice(intents['intents'][predicted_intent_index]['responses'])
    #     return response, confidence_scores[predicted_intent_index]
    # else:
    #     # Handle low confidence scenarios (e.g., suggest clarification, ask for rephrasing)
    #     return "I'm not quite sure what you mean. Could you rephrase your question?", 0.0

# Example usage
chatbot_response("what can you do?", best_model, intents)
# print(f"Chatbot Response: {response} (Confidence: {accuracy:.2f})")

name
[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.01063095 0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.03908929 0.
 0.003      0.20843651 0.22605952 0.00533333 0.00608333 0.00467857
 0.26502381 0.         0.         0.005    

In [56]:
from sklearn.metrics import accuracy_score

def chatbot_response(user_input):
    input_text = vectorizer.transform([user_input])
    print('Input Text: ', input_text)
    
    predicted_intent = best_model.predict(input_text)
    
    for intent in intents['intents']:
        if intent['tag'] == predicted_intent:
            response = random.choice(intent['responses'])
            break
            
    # Get the true labels
    true_labels = [intent['tag'] for intent in intents['intents']]
    
    # Get the predicted labels
    predicted_labels = [predicted_intent for _ in range(len(true_labels))]
    
    # Calculate the accuracy score
    accuracy = accuracy_score(true_labels, predicted_labels)
    
    print(f"Accuracy: {accuracy:.4f}")
    
    return response

# Example usage
response = chatbot_response("What time is the office of university open?")
print(response)

Input Text:    (0, 1487)	0.37807204421300233
  (0, 1435)	0.6382897929758469
  (0, 1017)	0.6142652653545149
  (0, 1002)	0.26893466371295366
Accuracy: 0.0079
The operating hours of Technological University of the Philippines are from 8:00 AM to 5:00 PM, Monday to Friday


In [106]:
user_input = input('what are the facilities in tup?')
response = chatbot_response(user_input)
print(user_input)
print(response)
print('----------------------------------------------------------------')

TypeError: chatbot_response() missing 2 required positional arguments: 'best_model' and 'intents'

In [108]:
print('Hello! I am a chatbot. How can I help you today? Type "quit" to exit.')
while True:
    user_input = input('> ')
    if user_input.lower() == 'quit':
        break
    response = chatbot_response(user_input)
    print(user_input)
    print(response)
    print('----------------------------------------------------------------')

Hello! I am a chatbot. How can I help you today? Type "quit" to exit.
Input Text:    (0, 711)	1.0
Hello
Good to see you! How can I assist you?
----------------------------------------------------------------
Input Text:    (0, 1544)	1.0
who are you?
My name's TekBot, and I'm here to answer your curiosity about Technological University of the Philippines (TUP)!
----------------------------------------------------------------
Input Text:    (0, 1463)	0.47967983087059607
  (0, 1125)	0.8774435935465917
who is the president of tup?
[{'Office/Unit': 'Office of the President', 'Designation': 'University President', 'Name': 'Engr. Reynaldo P. Ramos, PhD., EnP', 'Email': 'reynaldo_ramos@tup.edu.ph'}, {'Office/Unit': 'Office of the Vice President for Academic Affairs', 'Designation': 'VP for Academic Affairs', 'Name': 'Dr. Ryan C. Reyes', 'Email': 'ryan_reyes@tup.edu.ph, vpaa@tup.edu.ph'}, {'Office/Unit': 'Office of the Vice President for Research & Extension', 'Designation': 'VP for Research & 

In [24]:
print('Hello! I am a chatbot. How can I help you today? Type "quit" to exit.')
while True:
    user_input = input('> ')
    if user_input.lower() == 'quit':
        break
    response = chatbot_response(user_input)
    print('> ', user_input)
    print(response)
    print('----------------------------------------------------------------')

Hello! I am a chatbot. How can I help you today? Type "quit" to exit.


Hello
Hello! How can I assist you today? If you have any questions or need help with something, feel free to ask!
----------------------------------------------------------------
who is the president of tup?
Sure! Here's the link to 'And kwento ni Prez: President Ramos with US Ambassador MaryKay L. Carlson at a dinner in U.S. Ambassador’s Residence': [President Ramos with US Ambassador MaryKay L. Carlson at a dinner in U.S. Ambassador’s Residence](https://tup.edu.ph/president/corner/2023/08/president-ramos-with-us-ambassador-marykay-l-carlson-at-a-dinner-in-us-ambassadors-residence)
----------------------------------------------------------------
who is the principal?
The current principal of TUP is Engr. Reynaldo P. Ramos, PhD., EnP.
----------------------------------------------------------------
who are you?
[{'Office/Unit': 'College of Industrial Technology', 'Designation': 'Dean', 'Name': 'Asst. Prof. Elpidio S. Virrey', 'Email': 'elpidio_virrey@tup.edu.ph, cit@tup.edu.ph'}, {'Off

## Test Cases

In [None]:
test_cases_unknown = [
    "are they offering scholarships?",
    "what are the requirements?",
    "is there an installment for tuition fees?",
    "what are the program offer",
    "Are there supplementary admission requirements such as an essay, interview, etc.?",
    "When are the class starts and when it usually ends?",
    "What is the career outlook for graduates of this program?",
    "What is the passing rate of courses that need to take a board exam?",
    "What is the vision, mission and core values of computer science?",
    "Does the college accept transferees or late enrollee?",
    "What is the mode of classes? is it face-to-face or online?",
    "is there an admission fee and how much?",
    "What time is the office of university open?"
]

test_cases = [
    
]

## Export Model

In [14]:
import os
import pickle


if not os.path.exists('../with Model/model'):
    os.makedirs('model')

# if not os.path.exists('../with Model/dataset'):
#     os.makedirs('dataset')

# Save the trained model
with open('../with Model/model/chatbot_model.pkl', 'wb') as f:
    pickle.dump(naive_model, f)

# Save the vectorizer
with open('../with Model/model/vectorizer.pkl', 'wb') as f:
    pickle.dump(vectorizer, f)

# Save the intents to the "dataset" folder
# with open('../with Model/dataset/intents1.json', 'w') as f:
#     json.dump(intents, f)

## Testing

In [17]:
import pickle
# Save the vectorizer
with open('./model/vectorizer.pkl', 'wb') as f:
    pickle.dump(vectorizer, f)
    
# Save the trained model
with open('./model/naive_bayes_model.pkl', 'wb') as f:
    pickle.dump(naive_model, f)

In [38]:
import joblib, json, nltk

def load_model(model_path, vectorizer_path, data_path):
    # Load the model from file
    model = joblib.load(model_path)
    vectorizer = joblib.load(vectorizer_path)
    with open(data_path) as json_data:
        data = json.load(json_data)

    return model, vectorizer, data

def preprocess_input(text):
    lemmatizer = WordNetLemmatizer()
    tokens = nltk.word_tokenize(text.lower())
    tokens = [token for token in tokens if len(token) > 1]
    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens]
    return ' '.join(lemmatized_tokens)

def get_Chat_response(text):
    naive_bayes_model, vectorizer, data = load_model("./model/naive_bayes_model.pkl", "./model/vectorizer.pkl", "../intents.json")
    
    input_text = preprocess_input(text)
    input_text = vectorizer.transform([input_text])
    
    predicted_intent = naive_bayes_model.predict(input_text)[0]
    
    for intent in data['intents']:
        if intent['tag'] == predicted_intent:
            response = random.choice(intent['responses'])
            break
    print('Response: ', response)
    return response

In [15]:
response = chatbot_response('hello')
print(response)

hello
Hello! How can I assist you today? If you have any questions or need help with something, feel free to ask!
