## 1. Import Dependencies

In [1]:
%load_ext dotenv
%dotenv

import keras
import os
import nltk
from nltk.stem import WordNetLemmatizer
import glob
import pickle
import json
import numpy as np
from keras.models import Sequential, load_model
from keras.callbacks import EarlyStopping
from keras.layers import Dense,Dropout,Activation
import random
import datetime
from googlesearch import *
import webbrowser
import requests
# from pycricbuzz import Cricbuzz
import pygame.mixer
import billboard
import time

2024-01-16 17:07:20.130816: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-01-16 17:07:20.148260: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-01-16 17:07:20.373742: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-01-16 17:07:20.374148: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-01-16 17:07:20.414710: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

pygame 2.5.2 (SDL 2.28.2, Python 3.10.6)
Hello from the pygame community. https://www.pygame.org/contribute.html


## 2. Constants and Paths

In [2]:
DATA_PATH = os.path.abspath(os.path.join(os.getcwd(), '..', 'data'))
MODEL_PATH = os.path.abspath(os.path.join(os.getcwd(), '..', 'models'))
INTENTS_PATH = os.path.join(DATA_PATH, 'intents.json')
WORDS_PATH = os.path.join(DATA_PATH, 'words.pkl')
CLASSES_PATH = os.path.join(DATA_PATH, 'classes.pkl')

# Initializing the WordNet lemmatizer
lemmatizer = WordNetLemmatizer()

## 3. Tokenize and Lemmatize the Intents

In [4]:
# Lists to store words, classes, and documents
words=[]
classes=[]
documents=[]

# Ignored characters in tokenization
ignore=['?','!',',',"'s"]

In [5]:
# Reading data from intents.json file
data_file=open(INTENTS_PATH).read()
intents=json.loads(data_file)

In [6]:
# Loop through each intent in the intents dictionary
for intent in intents['intents']:
    # Loop through each pattern in the current intent
    for pattern in intent['patterns']:
        # Tokenize the pattern into words
        w = nltk.word_tokenize(pattern)

        # Extend the words list with the tokenized words
        words.extend(w)

        # Append a tuple containing (tokenized words, intent tag) to the documents list
        documents.append((w, intent['tag']))

        # Add intent tag to classes list, if still missing
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

In [7]:
# Lemmatize and lowercase each word in the words list, excluding ignored characters
words = [lemmatizer.lemmatize(word.lower()) for word in words if word not in ignore]

# Remove duplicate words, sort, and convert to a list
words = sorted(list(set(words)))

# Sort and convert classes to a list
classes = sorted(list(set(classes)))

# Save the preprocessed words and classes to pickle files for later use
pickle.dump(words,open(WORDS_PATH,'wb'))
pickle.dump(classes,open(CLASSES_PATH,'wb'))

## 4. Preparing Training Data

In [8]:
# List to store training data and initialize an empty output
training = []
output_empty = [0]*len(classes)

In [9]:
# Iterate through each document in the documents list
for doc in documents:
    # Initialize an empty bag of words for the current document
    bag = []

    # Get the tokenized pattern from the document
    pattern = doc[0]

    # Lemmatize and lowercase each word in the pattern
    pattern = [lemmatizer.lemmatize(word.lower()) for word in pattern ]

    # Iterate through each word in the list of all words
    for word in words:
        # Check if the word is in the pattern, append 1 if true, 0 if false
        if word in pattern:
            bag.append(1)
        else:
            bag.append(0)

    output_row = list(output_empty)
    # Set the value at the index corresponding to the class of the current document to 1
    output_row[classes.index(doc[1])] = 1

    # Append a pair of the bag of words and the output row to the training list
    training.append((bag, output_row))

In [10]:
# Shuffle the training data randomly
random.shuffle(training)

# Separate the features (X_train) and labels (y_train) from the training array
# Convert bag to NumPy array
X_train = np.array([item[0] for item in training])  
# Convert output_row to NumPy array
y_train = np.array([item[1] for item in training])

## 5. Modelling

In [12]:
def initialize_model():
    # Initialize model architecture
    model=Sequential()
    model.add(Dense(128, activation='relu', input_shape=(len(X_train[0]),)))
    model.add(Dropout(0.5))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(len(y_train[0]), activation='softmax'))

    # Compile model with given layers and metrics
    adam = keras.optimizers.Adam(0.001)
    model.compile(optimizer=adam,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # Define early stopping condition for training
    early_stopping = EarlyStopping(monitor='accuracy', patience=10, restore_best_weights=True)

    return model, early_stopping

In [35]:
model, early_stopping = initialize_model()


In [36]:
def train_model(model, X_train, y_train, early_stopping):
    history = model.fit(X_train, y_train,
                        epochs=200,
                        batch_size=10,
                        callbacks=[early_stopping],
                        verbose=-1)
    
    return history

In [45]:
history = train_model(model, X_train, y_train, early_stopping)
print(history.history.keys())
max(history.history['accuracy'])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
dict_keys(['loss', 'accuracy'])


0.9642857313156128

In [46]:
def save_model(history):
    # Save as a new model with continuous versioning
    for version in range(1, 99):
        file = f'kaybot_model_{str(version)}.h5'
        file_path = os.path.join(MODEL_PATH, file)
        if not os.path.exists(file_path):
            model.save(file_path, history)
            print(f'✅ Model saved as "{file}"')
            break
    return None


In [47]:
save_model(history)

✅ Model saved as "kaybot_model_1.h5"


  saving_api.save_model(


## 6. Load local Model and Files
#### After initial training of the model, notebook can be used by running the first two cells (1. Import Dependencies and 2. Constants and Paths) and everything below this point:

In [3]:
def load_local_model():
    # Loading a locally saved model
    local_model_paths = glob.glob(f"{MODEL_PATH}/*")

    # Return latest model
    model_path_on_disk = sorted(local_model_paths)[-1]
    model = keras.models.load_model(model_path_on_disk)
    print(f"✅ Model loaded from local disk: {model_path_on_disk}")

    return model

In [4]:
def load_data_files():
    intents = json.loads(open(INTENTS_PATH).read())
    words = pickle.load(open(WORDS_PATH,'rb'))
    classes = pickle.load(open(CLASSES_PATH,'rb'))
    print(f"✅ Intents loaded from local disk: {INTENTS_PATH}")
    print(f"✅ Words loaded from local disk: {WORDS_PATH}")
    print(f"✅ Classes loaded from local disk: {CLASSES_PATH}")
    
    return intents, words, classes

In [5]:
model = load_local_model()
intents, words, classes = load_data_files()

✅ Model loaded from local disk: /home/jarisfenner/code/Kaaykun/KayBot/models/kaybot_model_1.h5
✅ Intents loaded from local disk: /home/jarisfenner/code/Kaaykun/KayBot/data/intents.json
✅ Words loaded from local disk: /home/jarisfenner/code/Kaaykun/KayBot/data/words.pkl
✅ Classes loaded from local disk: /home/jarisfenner/code/Kaaykun/KayBot/data/classes.pkl


## 7. Predicting

In [6]:
def clean_up(sentence):
    sentence_words = nltk.word_tokenize(sentence)
    sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
    
    return sentence_words

In [7]:
def create_bow(sentence, words): # bow = bag of words
    sentence_words = clean_up(sentence)
    bag = list(np.zeros(len(words)))
    
    for s in sentence_words:
        for i, w in enumerate(words):
            if w == s: 
                bag[i] = 1
                
    return np.array(bag)

In [8]:
def predict_class(sentence, model):
    p = create_bow(sentence, words)
    res = model.predict(np.array([p]))[0]
    threshold = 0.8
    
    results = [[i, r] for i, r in enumerate(res) if r > threshold]
    results.sort(key=lambda x: x[1], reverse=True)
    
    return_list = []
    
    for result in results:
        return_list.append({'intent':classes[result[0]],'prob':str(result[1])})
        
    return return_list

In [12]:
def get_response(return_list, intents_json):
    if len(return_list)==0:
        tag = 'noanswer'
    else:    
        tag = return_list[0]['intent']
        
    if tag == 'datetime':        
        print(time.strftime("%A"))
        print(time.strftime("%d %B %Y"))
        print(time.strftime("%H:%M:%S"))

    if tag == 'google':
        query = input('Enter query: ')
        search_results = list(search(query, num_results=3))

        for url in search_results:
            webbrowser.open(url)
            
    if tag == 'weather':
        weather_api_key=os.environ.get('WEATHER_API_KEY')
        city_name = input("Enter city name: ")
        url = f'https://api.openweathermap.org/data/2.5/weather?appid={weather_api_key}&q={city_name}'

        response = requests.get(url).json()

        print('Present temp.: ',round(response['main']['temp']-273,2),'celcius ')
        print('Feels Like:: ',round(response['main']['feels_like']-273,2),'celcius ')
        print(response['weather'][0]['main'])
        
    if tag == 'news':
        news_api_key=os.environ.get('NEWS_API_KEY')
        url = f'http://newsapi.org/v2/top-headlines?country=us&apiKey={news_api_key}'

        open_news_page = requests.get(url).json()
        articles = open_news_page["articles"]

        results = [[article["title"], article["url"]] for article in articles]
          
        for i in range(10): 
            print(f'{i + 1})', results[i][0])
            print(results[i][1],'\n')
           
    if tag == 'song':
        chart = billboard.ChartData('hot-100')
        
        print('The top 10 songs at the moment are:')
        for i in range(10):
            song = chart[i]
            print(f"{i + 1}) {song.title.ljust(30)} by: {song.artist.ljust(50)}")
            
    if tag == 'timer':        
        pygame.mixer.init()
        minutes = float(input('Minutes to timer: '))

        time.sleep(minutes * 60)

        pygame.mixer.music.load('../backend/Handbell-ringing-sound-effect.mp3')
        pygame.mixer.music.play()

    list_of_intents = intents_json['intents']    
    
    for i in list_of_intents:
        if tag == i['tag']:
            result = random.choice(i['responses'])
            
    return result

In [13]:
def response(text):
    return_list = predict_class(text, model)
    response = get_response(return_list, intents)
    
    return response

In [14]:
while(1):
    x = input()
    print(response(x))
    
    if x.lower() in ['bye','goodbye','get lost','see you']:  
        break

Present temp.:  4.99 celcius 
Feels Like::  -1.05 celcius 
Clouds
...
1) Ryan Gosling Stunned Over Barbie’s 'I'm Just Ken' Win at Critics Choice Awards - Entertainment Tonight
https://www.youtube.com/watch?v=V3CCtZhh8Tg 

2) Yemen Houthi rebels fire missile at US warship in Red Sea in first attack after American-led strike - New York Post 
https://nypost.com/2024/01/15/news/yemen-houthi-rebels-fire-missile-at-us-warship-in-red-sea-in-first-attack-after-american-led-strike/ 

3) Nauru cuts diplomatic ties with Taiwan in favor of China - CNN
https://www.cnn.com/2024/01/15/asia/nauru-cuts-diplomatic-ties-taiwan-china-intl-hnk/index.html 

4) Asia stocks lag Nikkei's ascent, China skips rate cut - Reuters
https://www.reuters.com/markets/global-markets-wrapup-1-2024-01-15/ 

5) Jared Goff leads Lions to first playoff win in 32 years, 24-23 over Matthew Stafford and the Rams - The Associated Press
https://apnews.com/article/lions-rams-score-playoffs-stafford-goff-f5fd70546e6e7aa2f1c1da20687b