In [1]:
from gettext import install
import numpy as np
import random
import json


These lines of code import three libraries: numpy, random, and json.

Numpy is a library for scientific computing in Python. It provides functions for working with arrays and matrices, among other things.

The random library provides functions for generating random numbers and for randomly shuffling data.

The json library provides functions for reading and writing JSON (JavaScript Object Notation) data. JSON is a lightweight data format that is commonly used for exchanging data between a client and a server. The json library provides a convenient way to encode and decode JSON data in Python.

In [2]:
import nltk
# nltk.download('punkt')
from nltk.stem.porter import PorterStemmer
from nltk.stem import WordNetLemmatizer

stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()


These lines of code are importing the Natural Language Toolkit (nltk) library and two modules from it: PorterStemmer and WordNetLemmatizer.

The PorterStemmer is a rule-based algorithm that transforms words into their base form, or stem, by removing prefixes and suffixes. For example, the stem of the word "running" would be "run".

The WordNetLemmatizer is a more sophisticated method that uses a database of words and their relationships to transform words into their base form. Unlike the stemmer, the lemmatizer takes into account the context of the word and maps it to a valid word in the language. For example, the lemma of the word "running" would be "run".

After importing these modules, two objects are instantiated: one for the PorterStemmer and one for the WordNetLemmatizer. These objects can be used to stem or lemmatize words in your text data.

In [3]:
def tokenize(sentence):
    """
    split sentence into array of words/tokens
    a token can be a word or punctuation character, or number
    """
    return nltk.word_tokenize(sentence)


In [4]:
def lemmatize(word):
    """
    text = I was running in the park yesterday. She runs in the park every day. He is a good runner.
    I was running in the park yesterday.
    She runs in the park every day.
    He is a good runner.
    words = text.split()
    lemmatized_words = [lemmatizer.lemmatize(word, pos='v') for word in words]
    Output: ['I', 'be', 'run', 'in', 'the', 'park', 'yesterday.', 'She', 'run', 'in', 'the', 'park', 'every', 'day.', 'He', 'be', 'a', 'good', 'runner.']

    """
    return lemmatizer.lemmatize(word.lower())

In [5]:
def stem(word):
    """
    stemming = find the root form of the word
    examples:
    words = ["organize", "organizes", "organizing"]
    words = [stem(w) for w in words]
    -> ["organ", "organ", "organ"]
    """
    return stemmer.stem(word.lower())

In [6]:
def bag_of_words(tokenized_sentence, words):
    """
    return bag of words array:
    1 for each known word that exists in the sentence, 0 otherwise
    example:
    sentence = ["hello", "how", "are", "you"]
    words = ["hi", "hello", "I", "you", "bye", "thank", "cool"]
    bog   = [  0 ,    1 ,    0 ,   1 ,    0 ,    0 ,      0]
    """
    # stem each word
    sentence_words = [stem(word) for word in tokenized_sentence]
    # initialize bag with 0 for each word
    bag = np.zeros(len(words), dtype=np.float32)
    for idx, w in enumerate(words):
        if w in sentence_words:
            bag[idx] = 1

    return bag

This function creates a bag of words representation of a sentence. A bag of words representation is a numerical representation of a text document, where the presence (and often the frequency) of words is taken into account, but the order is ignored. In the code, tokenized_sentence is a list of words that make up a sentence, and words is a list of all words that the model knows. The function returns an array of zeros with length equal to words, and sets the value at an index to 1 if the word at that index appears in tokenized_sentence. For example, if the sentence is ["hello", "how", "are", "you"] and the words are ["hi", "hello", "I", "you", "bye", "thank", "cool"], then the returned array would be [0, 1, 0, 1, 0, 0, 0].





In [7]:
with open('intents.json', 'r') as f:
    intents = json.load(f)


In [8]:
intents

{'intents': [{'tag': 'greetings',
   'patterns': ['Hello', 'Hey', 'Hi', 'Good Day', 'Greetings'],
   'responses': ['Hello!', 'Hey', 'What Can I do for you?']},
  {'tag': 'Acne',
   'patterns': ['blackheads',
    'pimples',
    'face pimples',
    'neck pimples',
    'shoulder pimples'],
   'responses': ['According to your symptoms you might have Acne',
    'You have Acne',
    'I suppose you have Acne']},
  {'tag': 'Attention Deficit Disorder (ADHD)',
   'patterns': ['impulsivity',
    'hyperactivity',
    'lack of focus',
    'messy works',
    'restlessness'],
   'responses': ['According to your symptoms you might have Attention Deficit Disorder',
    'You have Attention Deficit Disorder',
    'I suppose you have Attention Deficit Disorder']},
  {'tag': 'Allergies',
   'patterns': ['sneezing',
    'itchy eyes',
    'coughing',
    'watery eyes',
    'itchy nose'],
   'responses': ['According to your symptoms you might have Allergies',
    'You have Allergies',
    'I suppose you have

In [9]:
all_words = []
tags = []
xy = []

In [10]:
for intent in intents['intents']:
    tag = intent['tag']
    # add to tag list
    tags.append(tag)
    for pattern in intent['patterns']:
        # tokenize each word in the sentence
        w = tokenize(pattern)
        # add to our words list
        all_words.extend(w)
        # add to xy pair
        xy.append((w, tag))

This code defines a function lbag_of_words that takes two inputs: tokenized_sentence and words.

tokenized_sentence is a list of words, where each word is a token in a sentence.

words is a list of all unique words in the entire dataset.

The function first lemmatizes each word in the tokenized_sentence using the lemmatize function. It then initializes an array lbog of zeros with the length equal to the number of unique words in the dataset.

The function then checks if each word in words is present in sentence_words, if it is present it sets the corresponding index in lbog to 1.

The resulting lbog array represents a bag of words representation of the tokenized_sentence with each index in the array representing the presence or absence of a word in the words list

In [11]:
xy

[(['Hello'], 'greetings'),
 (['Hey'], 'greetings'),
 (['Hi'], 'greetings'),
 (['Good', 'Day'], 'greetings'),
 (['Greetings'], 'greetings'),
 (['blackheads'], 'Acne'),
 (['pimples'], 'Acne'),
 (['face', 'pimples'], 'Acne'),
 (['neck', 'pimples'], 'Acne'),
 (['shoulder', 'pimples'], 'Acne'),
 (['impulsivity'], 'Attention Deficit Disorder (ADHD)'),
 (['hyperactivity'], 'Attention Deficit Disorder (ADHD)'),
 (['lack', 'of', 'focus'], 'Attention Deficit Disorder (ADHD)'),
 (['messy', 'works'], 'Attention Deficit Disorder (ADHD)'),
 (['restlessness'], 'Attention Deficit Disorder (ADHD)'),
 (['sneezing'], 'Allergies'),
 (['itchy', 'eyes'], 'Allergies'),
 (['coughing'], 'Allergies'),
 (['watery', 'eyes'], 'Allergies'),
 (['itchy', 'nose'], 'Allergies'),
 (['forgetful'], 'Alzheimers'),
 (['confusion', 'with', 'time'], 'Alzheimers'),
 (['poor', 'judgement'], 'Alzheimers'),
 (['loss', 'of', 'appetite'], 'Ankylosing Spondylitis'),
 (['weight', 'loss'], 'Ankylosing Spondylitis'),
 (['difficulty', '

In [12]:
# stem and lower each word
ignore_words = ['?', '.', '!']
all_words = [stem(w) for w in all_words if w not in ignore_words]
# remove duplicates and sort
all_words = sorted(set(all_words))
tags = sorted(set(tags))


This code performs text preprocessing on a list of words. The preprocessing steps are:

Remove words that are in the ignore_words list (these words are '?', '.', and '!').
Stem each word using the stem() function (which is likely a stemming function, such as the Porter stemmer, that reduces words to their root form).
Lowercase each word.
Remove duplicates and sort the resulting list of words.
The same steps are repeated for the list of tags (which likely represent the categories or labels associated with each sentence).

In [13]:
len(all_words)

512

In [14]:
print(len(xy), "patterns")
print(len(tags), "tags:", tags)
print(len(all_words), "unique stemmed words:", all_words)

621 patterns
111 tags: ['Abdominal Migraine', 'Abscessed Tooth', 'Absence Seizure', 'Achalasia', 'Acne', 'Acute Respiratory Distress Syndrome (ARDS)', 'Allergies', 'Alzheimers', 'Amenorrhea', 'Amyloidosis', 'Anemia', 'Ankylosing Spondylitis', 'Anorexia', 'Antitrypsin Deficiency', 'Anxiety or Panic Disorder', 'Aparaxia of Speech', 'Appendicitis', 'Arachnoiditis', 'Arrhythmia', 'Arthritis', 'Asthma', 'Asthma symptoms', 'Attention Deficit Disorder (ADHD)', 'Autism', 'Bacterial Arthritis', 'Bacterial Meningitis', 'Benign Prostate Enlargement(BPE)', 'Bipolar Disorder', 'Blepharitis', 'Bronchiectasis', 'Bronchitis', 'Bulimia', 'Catarrh', 'Cellulitis', 'Chest Infection', 'Chest Pain', 'Chronic Fatigue Syndrome', 'Common Cold', 'Conjunctivitis', 'Consultation', 'Dehydration', 'Dental Abscess', 'Depression', 'Depression symptoms', 'Diabetes', 'Diabetes symptoms', 'Diarrhoea', 'Dystonia', 'Epilepsy', 'Escherichia Coli', 'Fetal alcohol spectrum disorder', 'Flu', 'Food poisoning', 'Fungal nail inf

In [15]:
# stem and lower each word
ignore_words = ['?', '.', '!']
all_words = [lemmatize(w) for w in all_words if w not in ignore_words]
# remove duplicates and sort
all_words = sorted(set(all_words))
tags = sorted(set(tags))


This code performs two tasks:

Stemming and lowercasing words: It iterates over the list of words all_words and lemmatizes each word using the lemmatize function. The resulting words are then lowercased. Additionally, any words in the ignore_words list are ignored and not included in the final list of words.

Removing duplicates and sorting: The final list of words is then converted into a set, which removes any duplicates. The set is then sorted and stored in the all_words variable. The same process is applied to the tags list, resulting in a sorted set of tags.

In [16]:
len(all_words)

511

In [17]:
print(len(xy), "patterns")
print(len(tags), "tags:", tags)
print(len(all_words), "unique emmatized words:", all_words)

621 patterns
111 tags: ['Abdominal Migraine', 'Abscessed Tooth', 'Absence Seizure', 'Achalasia', 'Acne', 'Acute Respiratory Distress Syndrome (ARDS)', 'Allergies', 'Alzheimers', 'Amenorrhea', 'Amyloidosis', 'Anemia', 'Ankylosing Spondylitis', 'Anorexia', 'Antitrypsin Deficiency', 'Anxiety or Panic Disorder', 'Aparaxia of Speech', 'Appendicitis', 'Arachnoiditis', 'Arrhythmia', 'Arthritis', 'Asthma', 'Asthma symptoms', 'Attention Deficit Disorder (ADHD)', 'Autism', 'Bacterial Arthritis', 'Bacterial Meningitis', 'Benign Prostate Enlargement(BPE)', 'Bipolar Disorder', 'Blepharitis', 'Bronchiectasis', 'Bronchitis', 'Bulimia', 'Catarrh', 'Cellulitis', 'Chest Infection', 'Chest Pain', 'Chronic Fatigue Syndrome', 'Common Cold', 'Conjunctivitis', 'Consultation', 'Dehydration', 'Dental Abscess', 'Depression', 'Depression symptoms', 'Diabetes', 'Diabetes symptoms', 'Diarrhoea', 'Dystonia', 'Epilepsy', 'Escherichia Coli', 'Fetal alcohol spectrum disorder', 'Flu', 'Food poisoning', 'Fungal nail inf

In [18]:
# create training data
X_train = []
y_train = []
for (pattern_sentence, tag) in xy:
    # X: bag of words for each pattern_sentence
    bag = bag_of_words(pattern_sentence, all_words)
    X_train.append(bag)
    # y: PyTorch CrossEntropyLoss needs only class labels, not one-hot
    label = tags.index(tag)
    y_train.append(label)

This code is creating training data for a machine learning model. The xy variable is assumed to be a list of tuples, where each tuple contains a sentence (pattern_sentence) and a corresponding tag (tag).

For each sentence in the list, the code first creates a bag of words representation of the sentence, which is a numerical representation of the sentence that can be used as input to a machine learning model. The bag of words representation is created using the bag_of_words function. The input to this function is the sentence and a list of all unique words in the data (all_words).

Next, the code converts the tag of the sentence into a numerical label by finding the index of the tag in the list of tags (tags). The resulting bag of words representation and label are then added to the lists X_train and y_train, respectively. These lists will contain all of the input/output pairs used to train the model.

In [19]:
bag_of_words(pattern_sentence, all_words)


array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0.

In [20]:
X_train

[array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 

In [21]:
y_train

[109,
 109,
 109,
 109,
 109,
 4,
 4,
 4,
 4,
 4,
 22,
 22,
 22,
 22,
 22,
 6,
 6,
 6,
 6,
 6,
 7,
 7,
 7,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 14,
 14,
 14,
 14,
 14,
 14,
 14,
 14,
 19,
 19,
 19,
 13,
 13,
 13,
 13,
 0,
 0,
 0,
 0,
 0,
 9,
 9,
 9,
 9,
 9,
 9,
 9,
 1,
 1,
 1,
 1,
 1,
 1,
 8,
 8,
 8,
 8,
 2,
 2,
 2,
 2,
 99,
 99,
 99,
 99,
 3,
 3,
 3,
 3,
 16,
 16,
 16,
 16,
 16,
 16,
 16,
 15,
 15,
 15,
 15,
 17,
 17,
 17,
 17,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 18,
 18,
 18,
 18,
 18,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 23,
 23,
 23,
 23,
 23,
 23,
 23,
 28,
 28,
 28,
 28,
 28,
 28,
 28,
 24,
 24,
 24,
 25,
 25,
 25,
 25,
 25,
 25,
 26,
 26,
 26,
 26,
 27,
 27,
 27,
 27,
 27,
 27,
 27,
 27,
 27,
 29,
 29,
 29,
 30,
 30,
 30,
 30,
 30,
 31,
 31,
 31,
 31,
 32,
 32,
 32,
 32,
 32,
 32,
 33,
 33,
 33,
 33,
 34,
 34,
 34,
 35,
 35,
 35,
 35,
 36,
 36,
 36,
 36,
 36,
 36,
 36,
 36,
 36,


In [22]:
len(X_train)

621

In [23]:
len(X_train[0])

511

In [24]:
X_train

[array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 

In [25]:
X_train[0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0.

In [26]:
len(y_train)

621

In [27]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from model import NeuralNet

This code imports necessary modules and classes from the PyTorch library.

The torch module is the main module of PyTorch, which provides tensor computation with strong GPU acceleration.
The torch.nn module contains classes for defining and training neural networks.
The torch.utils.data module provides tools for creating and pre-processing datasets.
The line from model import NeuralNet is importing the NeuralNet class from the model module, which was likely defined in a separate file in your project. The NeuralNet class is a custom implementation of a neural network, which will be used in the main training script.

In [28]:
# Hyper-parameters 
num_epochs = 1000
batch_size = 8
learning_rate = 0.001
input_size = len(X_train[0])
hidden_size = 8
output_size = len(tags)
print(input_size, output_size)

511 111


In [32]:
# create training data
X_train = []
y_train = []
for (pattern_sentence, tag) in xy:
    # X: bag of words for each pattern_sentence
    bag = bag_of_words(pattern_sentence, all_words)
    X_train.append(bag)
    # y: PyTorch CrossEntropyLoss needs only class labels, not one-hot
    label = tags.index(tag)
    y_train.append(label)
    
    

In [90]:
# create training data
X_train = []
y_train = []
for (pattern_sentence, tag) in xy:
    # X: bag of words for each pattern_sentence
    bag = bag_of_words(pattern_sentence, all_words)
    X_train.append(bag)
    # y: PyTorch CrossEntropyLoss needs only class labels, not one-hot
    label = tags.index(tag)
    y_train.append(label)
num_epochs = 1800
batch_size = 16
learning_rate = 0.005
input_size = len(X_train[0])
hidden_size = 8
output_size = len(tags)



class ChatDataset(Dataset):
    def __init__(self):
        self.n_samples = len(X_train)
        self.x_data = X_train
        self.y_data = y_train

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.n_samples
    

dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)
        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)
        self.relu = nn.ReLU()
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, input, hidden):
        batch_size = input.size(0)
        hidden = self.initHidden(batch_size).to(input.device)
        combined = torch.cat((input, hidden), 1)
        hidden = self.i2h(combined)
        output = self.i2o(combined)
        output = self.softmax(output)
        return output, hidden
    
    def initHidden(self, batch_size):
        return torch.zeros(batch_size, self.hidden_size)



RNN= RNN(input_size, hidden_size, output_size).to(device)


# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(RNN.parameters(), lr=learning_rate)

    
# Train the model
for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(device)
        
        # Forward pass
        hidden = RNN.initHidden(batch_size).to(device)
        outputs, hidden = RNN(words, hidden.view(1, -1))
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if (epoch+1) % 100 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
        
print(f'final loss: {loss.item():.4f}')

data = {
    "model_state": RNN.state_dict(),
    "input_size": input_size,
    "hidden_size": hidden_size,
    "output_size": output_size,
    "all_words": all_words,
    "tags": tags
}

FILE = "RNN.pth"
torch.save(data, FILE)

print(f'training complete. file saved to {FILE}')



Epoch [100/1800], Loss: 0.3990
Epoch [200/1800], Loss: 0.3320
Epoch [300/1800], Loss: 0.4745
Epoch [400/1800], Loss: 0.6151
Epoch [500/1800], Loss: 0.6496
Epoch [600/1800], Loss: 0.5118
Epoch [700/1800], Loss: 0.4205
Epoch [800/1800], Loss: 0.3090
Epoch [900/1800], Loss: 0.0881
Epoch [1000/1800], Loss: 0.5526
Epoch [1100/1800], Loss: 0.3478
Epoch [1200/1800], Loss: 0.1667
Epoch [1300/1800], Loss: 0.0000
Epoch [1400/1800], Loss: 0.2367
Epoch [1500/1800], Loss: 0.3971
Epoch [1600/1800], Loss: 0.2962
Epoch [1700/1800], Loss: 0.6336
Epoch [1800/1800], Loss: 0.3814
final loss: 0.3814
training complete. file saved to RNN.pth


In [36]:
num_epochs = 1800
batch_size = 16
learning_rate = 0.005
input_size = len(X_train[0])
hidden_size = 8
output_size = len(tags)



class ChatDataset(Dataset):
    def __init__(self):
        self.n_samples = len(X_train)
        self.x_data = X_train
        self.y_data = y_train

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.n_samples
    

dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, input, hidden):
        output, hidden = self.lstm(input, hidden)
        output = self.fc(output[:, -1, :])
        output = self.softmax(output)
        return output, hidden
    
    def initHidden(self, batch_size):
        return (torch.zeros(1, batch_size, self.hidden_size).to(device),
                torch.zeros(1, batch_size, self.hidden_size).to(device))


LSTM= LSTM(input_size, hidden_size, output_size).to(device)


# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(LSTM.parameters(), lr=learning_rate)

    
# Train the model
for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(device)
        
        # Forward pass
        hidden = LSTM.initHidden(words.size(0))
        outputs, hidden = LSTM(words.unsqueeze(1), hidden)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if (epoch+1) % 100 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
        
print(f'final loss: {loss.item():.4f}')


data = {
    "model_state": LSTM.state_dict(),
    "input_size": input_size,
    "hidden_size": hidden_size,
    "output_size": output_size,
    "all_words": all_words,
    "tags": tags
}

FILE = "LSTM.pth"
torch.save(data, FILE)
data = torch.load(FILE)
input_size = data["input_size"]
hidden_size = data["hidden_size"]
output_size = data["output_size"]
all_words = data['all_words']
tags = data['tags']
model_state = data["model_state"]
model= LSTM(input_size, hidden_size, output_size)
model.load_state_dict(model_state)
model.eval()
def response(msg):
    sentence = tokenize(msg)
    X = bag_of_words(sentence, all_words)
    X = X.reshape(1, X.shape[0])
    X = torch.from_numpy(X).to(device)

    output = model(X)
    _, predicted = torch.max(output, dim=1)

    tag = tags[predicted.item()]

    probs = torch.softmax(output, dim=1)
    prob = probs[0][predicted.item()]
    if prob.item() > 0.75:
        for intent in intents['intents']:
            if tag == intent["tag"]:
                return random.choice(intent['responses'])
            
    return "I'm sorry, but I'm not sure what you mean by " + msg + " Can you please provide some additional context or clarify your question?"

msg= "What medicines can I buy to help me with my common cold?"
response(msg)


Epoch [100/1800], Loss: 0.6642
Epoch [200/1800], Loss: 0.9873
Epoch [300/1800], Loss: 0.2408
Epoch [400/1800], Loss: 0.4050
Epoch [500/1800], Loss: 0.3195
Epoch [600/1800], Loss: 0.5730
Epoch [700/1800], Loss: 0.4790
Epoch [800/1800], Loss: 0.8392
Epoch [900/1800], Loss: 0.6225
Epoch [1000/1800], Loss: 0.2468
Epoch [1100/1800], Loss: 0.4242
Epoch [1200/1800], Loss: 0.1975
Epoch [1300/1800], Loss: 0.1968
Epoch [1400/1800], Loss: 0.2536
Epoch [1500/1800], Loss: 0.3226
Epoch [1600/1800], Loss: 0.6711
Epoch [1700/1800], Loss: 0.8285
Epoch [1800/1800], Loss: 0.5485
final loss: 0.5485
training complete. file saved to LSTM.pth


In [105]:
num_epochs = 1800
batch_size = 16
learning_rate = 0.005
input_size = len(X_train[0])
hidden_size = 8
output_size = len(tags)



class ChatDataset(Dataset):
    def __init__(self):
        self.n_samples = len(X_train)
        self.x_data = X_train
        self.y_data = y_train

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.n_samples
    

dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class BRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(BRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = 1
        self.num_directions = 2
        self.rnn = nn.RNN(input_size, hidden_size, num_layers=self.num_layers, 
                          bidirectional=True, batch_first=True)
        self.fc = nn.Linear(hidden_size * self.num_directions, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, input, hidden):
        output, hidden = self.rnn(input, hidden)
        output = self.fc(output[:, -1, :])
        output = self.softmax(output)
        return output, hidden
    
    def initHidden(self, batch_size):
        return torch.zeros(self.num_layers * self.num_directions, batch_size, self.hidden_size)


BRNN = BRNN(input_size, hidden_size, output_size).to(device)


# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(BRNN.parameters(), lr=learning_rate)

    
# Train the model
for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(device)
        
        # Forward pass
        hidden = BRNN.initHidden(words.size(0)).to(device)
        outputs, hidden = BRNN(words.unsqueeze(1), hidden)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if (epoch+1) % 100 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
        
print(f'final loss: {loss.item():.4f}')


data = {
    "model_state": BRNN.state_dict(),
    "input_size": input_size,
    "hidden_size": hidden_size,
    "output_size": output_size,
    "all_words": all_words,
    "tags": tags
}

FILE = "BRNN.pth"
torch.save(data, FILE)

print(f'training complete. file saved to {FILE}')



Epoch [100/1800], Loss: 0.4236
Epoch [200/1800], Loss: 0.9006
Epoch [300/1800], Loss: 0.4280
Epoch [400/1800], Loss: 0.7579
Epoch [500/1800], Loss: 0.6104
Epoch [600/1800], Loss: 0.4396
Epoch [700/1800], Loss: 0.6443
Epoch [800/1800], Loss: 0.5394
Epoch [900/1800], Loss: 0.7975
Epoch [1000/1800], Loss: 0.3297
Epoch [1100/1800], Loss: 0.3679
Epoch [1200/1800], Loss: 1.0092
Epoch [1300/1800], Loss: 0.3215
Epoch [1400/1800], Loss: 0.2808
Epoch [1500/1800], Loss: 0.2017
Epoch [1600/1800], Loss: 0.3630
Epoch [1700/1800], Loss: 0.3166
Epoch [1800/1800], Loss: 0.3866
final loss: 0.3866
training complete. file saved to BRNN.pth


In [106]:
num_epochs = 1800
batch_size = 16
learning_rate = 0.005
input_size = len(X_train[0])
hidden_size = 8
output_size = len(tags)



class ChatDataset(Dataset):
    def __init__(self):
        self.n_samples = len(X_train)
        self.x_data = X_train
        self.y_data = y_train

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.n_samples
    

dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class BLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(BLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = 1
        self.num_directions = 2
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=self.num_layers, 
                            bidirectional=True, batch_first=True)
        self.fc = nn.Linear(hidden_size * self.num_directions, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, input, hidden):
        output, hidden = self.lstm(input, hidden)
        output = self.fc(output[:, -1, :])
        output = self.softmax(output)
        return output, hidden
    
    def initHidden(self, batch_size):
        return (torch.zeros(self.num_layers * self.num_directions, batch_size, self.hidden_size),
                torch.zeros(self.num_layers * self.num_directions, batch_size, self.hidden_size))


BLSTM = BLSTM(input_size, hidden_size, output_size).to(device)



# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(BLSTM.parameters(), lr=learning_rate)

    
# Train the model
for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(device)
        
        # Forward pass
        hidden = BLSTM.initHidden(words.size(0))
        outputs, hidden = BLSTM(words.unsqueeze(1), hidden)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if (epoch+1) % 100 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
        
print(f'final loss: {loss.item():.4f}')


data = {
    "model_state": BLSTM.state_dict(),
    "input_size": input_size,
    "hidden_size": hidden_size,
    "output_size": output_size,
    "all_words": all_words,
    "tags": tags
}

FILE = "BLSTM.pth"
torch.save(data, FILE)

print(f'training complete. file saved to {FILE}')



Epoch [100/1800], Loss: 1.2370
Epoch [200/1800], Loss: 0.7419
Epoch [300/1800], Loss: 0.5530
Epoch [400/1800], Loss: 0.3440
Epoch [500/1800], Loss: 0.4823
Epoch [600/1800], Loss: 0.4438
Epoch [700/1800], Loss: 0.3813
Epoch [800/1800], Loss: 0.2700
Epoch [900/1800], Loss: 0.5189
Epoch [1000/1800], Loss: 0.6437
Epoch [1100/1800], Loss: 0.4065
Epoch [1200/1800], Loss: 0.2809
Epoch [1300/1800], Loss: 0.5139
Epoch [1400/1800], Loss: 0.5963
Epoch [1500/1800], Loss: 0.6573
Epoch [1600/1800], Loss: 0.3118
Epoch [1700/1800], Loss: 0.2811
Epoch [1800/1800], Loss: 0.3769
final loss: 0.3769
training complete. file saved to BLSTM.pth


In [112]:
from sklearn import neural_network


num_epochs = 1000
batch_size = 8
learning_rate = 0.001
input_size = len(X_train[0])
hidden_size = 8
output_size = len(tags)


class ChatDataset(Dataset):
    def __init__(self):
        self.n_samples = len(X_train)
        self.x_data = X_train
        self.y_data = y_train

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.n_samples
    

dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size) 
        self.l2 = nn.Linear(hidden_size, hidden_size) 
        self.l3 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        out = self.relu(out)
        out = self.l3(out)
        # no activation and no softmax at the end
        return out

NeuralNet= NeuralNet(input_size, hidden_size, output_size).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(NeuralNet.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(dtype=torch.long).to(device)
        
        # Forward pass
        outputs = NeuralNet(words)
        # if y would be one-hot, we must apply
        # labels = torch.max(labels, 1)[1]
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if (epoch+1) % 100 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


print(f'final loss: {loss.item():.4f}')

data = {
"model_state": NeuralNet.state_dict(),
"input_size": input_size,
"hidden_size": hidden_size,
"output_size": output_size,
"all_words": all_words,
"tags": tags
}

FILE = "NeuralNet.pth"
torch.save(data, FILE)

print(f'training complete. file saved to {FILE}')


Epoch [100/1000], Loss: 0.1395
Epoch [200/1000], Loss: 1.1985
Epoch [300/1000], Loss: 0.0139
Epoch [400/1000], Loss: 0.2595
Epoch [500/1000], Loss: 0.2980
Epoch [600/1000], Loss: 0.5483
Epoch [700/1000], Loss: 0.6961
Epoch [800/1000], Loss: 0.6428
Epoch [900/1000], Loss: 0.1907
Epoch [1000/1000], Loss: 0.8099
final loss: 0.8099
training complete. file saved to NeuralNet.pth


In [43]:
from transformers import BertModel
# Load pre-trained BERT model
bert_model = BertModel.from_pretrained('bert-base-uncased')

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [55]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel

# Hyperparameters
num_epochs = 4
batch_size = 64
learning_rate = 0.005
output_size = len(tags)

# Load the pre-trained BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

class ChatDataset(Dataset):
    def __init__(self):
        self.n_samples = len(X_train)
        self.x_data = X_train
        self.y_data = y_train

    def __getitem__(self, index):
        inputs = tokenizer.encode_plus(
            self.x_data[index],
            add_special_tokens=True,
            max_length=128,
            padding='max_length',
            return_attention_mask=True,
            return_tensors='pt',
            truncation=True
        )
        return inputs, self.y_data[index]

    def __len__(self):
        return self.n_samples

dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load the pre-trained BERT model
bert = BertModel.from_pretrained('bert-base-uncased').to(device)

class BertClassifier(nn.Module):
    def __init__(self, bert, output_size):
        super(BertClassifier, self).__init__()
        self.bert = bert
        self.dropout = nn.Dropout(p=0.2)
        self.fc = nn.Linear(bert.config.hidden_size, output_size)

    def forward(self, input_ids, attention_mask):
        _, pooled_output = self.bert(input_ids=input_ids, attention_mask=attention_mask, return_dict=False)
        output = self.dropout(pooled_output)
        output = self.fc(output)
        return output

model = BertClassifier(bert, output_size).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [56]:
# Train the model
for epoch in range(num_epochs):
    for (inputs, labels) in train_loader:
        input_ids = inputs['input_ids'].squeeze(1).to(device)
        attention_mask = inputs['attention_mask'].squeeze(1).to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(input_ids, attention_mask)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if (epoch+1) % 100 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
        
print(f'final loss: {loss.item():.4f}')

# Save the trained model
data = {
    "model_state": model.state_dict(),
    "all_words": all_words,
    "tags": tags
}
FILE = "bert_classifier.pth"
torch.save(data, FILE)

print(f'Training complete. Model saved to {FILE}.')

final loss: 5.0813
Training complete. Model saved to bert_classifier.pth.


These lines of code define several hyperparameters for a machine learning model.

num_epochs is the number of times the model will be trained on the entire training dataset. In each epoch, the model will see all the training examples once. The higher the number of epochs, the more the model will be trained and the better it will become at making predictions.

batch_size is the number of training examples used in each iteration of model training. The model is updated after each batch of training examples is processed. Smaller batch sizes can be used to update the model more frequently, but larger batch sizes can lead to faster training.

learning_rate is the step size at which the model updates its weights in response to the loss function. A smaller learning rate means that the model will make smaller updates to its weights, which can be useful for avoiding overshooting a good solution. A larger learning rate can lead to faster convergence but may result in the model oscillating or not converging at all.

input_size is the number of features in each training example. For example, if each training example is a vector of length 100, then input_size would be 100.

hidden_size is the number of neurons in the hidden layer of a neural network. The hidden layer is used to learn intermediate representations of the data that are then used to make predictions.

output_size is the number of predictions the model will make for each input. For example, if the model is predicting the class labels of images, then output_size would be the number of classes.

These hyperparameters can greatly impact the performance of a machine learning model, and selecting good values for them requires some experimentation and knowledge of the problem you're trying to solve.

In [None]:
X_train = torch.tensor(X_train)
y_train = torch.tensor(y_train)

class ChatDataset(Dataset):

    def __init__(self):
        self.n_samples = len(X_train)
        self.x_data = X_train
        self.y_data = y_train

    # support indexing such that dataset[i] can be used to get i-th sample
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    # we can call len(dataset) to return the size
    def __len__(self):
        return self.n_samples

dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = NeuralNet(input_size, hidden_size, output_size).to(device)


print(f"X_train size: {X_train.size()}")
print(f"y_train size: {y_train.size()}")

X_train size: torch.Size([621, 511])
y_train size: torch.Size([621])


  X_train = torch.tensor(X_train)
  y_train = torch.tensor(y_train)


This code defines a custom dataset class ChatDataset for a machine learning task. The class extends the Dataset class from the PyTorch torch.utils.data library.

The class has three methods:

__init__: This is the constructor of the class, which sets the number of samples in the dataset to len(X_train), and sets the x_data and y_data instance variables to X_train and y_train, respectively.

__getitem__: This method allows indexing of the dataset using the [] operator. It returns the i-th sample in the dataset, which is the i-th pair of x_data and y_data.

__len__: This method returns the number of samples in the dataset, which is stored in the n_samples instance variable.

After defining the ChatDataset class, an instance of the class, dataset, is created. Then, a PyTorch DataLoader object, train_loader, is created. The DataLoader class is used to load the data in the dataset in mini-batches during training. The train_loader is initialized with dataset, a batch_size value, and the flag shuffle set to True. The num_workers argument is set to 0, which means that data loading will be done in the main process.

This code sets up the loss function and optimization algorithm for training a neural network.

The criterion variable is set to a nn.CrossEntropyLoss object, which is a commonly used loss function for multi-class classification problems. The CrossEntropyLoss computes the cross-entropy between the predicted class probabilities and the true class labels.

The optimizer variable is set to an Adam optimizer from the torch.optim module. The Adam optimizer is a popular optimization algorithm that uses the gradient of the loss with respect to the model parameters to update the parameters. The Adam optimizer takes two arguments: the model parameters and the learning rate. In this case, the model parameters are passed as model.parameters() and the learning rate is set to learning_rate.

In [None]:
import torch
import torch.nn as nn


class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size) 
        self.l2 = nn.Linear(hidden_size, hidden_size) 
        self.l3 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        out = self.relu(out)
        out = self.l3(out)
        # no activation and no softmax at the end
        return out

model = NeuralNet(input_size, hidden_size, output_size).to(device)

This code defines a class named NeuralNet that extends the nn.Module class from PyTorch's torch.nn module. The NeuralNet class is a simple implementation of a feedforward neural network.

The class constructor (__init__) takes three arguments: input_size, hidden_size, and num_classes. These are used to specify the number of input features, the number of hidden units in each hidden layer, and the number of output classes, respectively.

The constructor sets up three linear transformations (nn.Linear) named l1, l2, and l3, with the input size, hidden size, and number of classes as the input and output dimensions, respectively. It also sets up a rectified linear unit (ReLU) activation function named relu.

The forward method implements the forward pass of the neural network. It takes an input tensor x and applies the linear transformations and ReLU activations in sequence. The final output is the result of the last linear transformation, with no activation or softmax applied.

This code defines an LSTM-based model called LSTMNet. The constructor initializes an LSTM layer with the specified input size and hidden size, as well as a fully connected layer with the specified number of output classes. In the forward method, the input x is passed through the LSTM layer with an initial hidden and cell state of zeros. The last hidden state is then passed to the fully connected layer, and the output of the final layer is returned without any additional activation or normalization applied.

This code defines an RNN-based model called RNNNet. The constructor initializes an RNN layer with the specified input size and hidden size, as well as a fully connected layer with the specified number of output classes. In the forward method, the input x is passed through the RNN layer with an initial hidden state of zeros. The last hidden state is then passed to the fully connected layer, and the output of the final layer is returned without any additional activation or normalization applied.

In [None]:
# Train the model
for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(dtype=torch.long).to(device)
        
        # Forward pass
        outputs = model(words)
        # if y would be one-hot, we must apply
        # labels = torch.max(labels, 1)[1]
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if (epoch+1) % 100 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [100/1000], Loss: 4.8806
Epoch [200/1000], Loss: 4.6093
Epoch [300/1000], Loss: 4.6926
Epoch [400/1000], Loss: 4.4932
Epoch [500/1000], Loss: 4.7243
Epoch [600/1000], Loss: 4.7217
Epoch [700/1000], Loss: 4.6561
Epoch [800/1000], Loss: 4.6764
Epoch [900/1000], Loss: 4.7548
Epoch [1000/1000], Loss: 4.6857


This code is training a machine learning model. It uses a loop over an epoch range of num_epochs, with an inner loop that iterates over the training data, which is stored in train_loader.

For each iteration of the inner loop, the input data (words) and its corresponding labels (labels) are transferred to the device specified by the device variable (likely either the CPU or a GPU).

The forward pass of the model is then performed by calling model(words) and storing the output in the outputs variable. The loss is calculated by passing the outputs and the true labels to the criterion function.

The code then performs backpropagation and optimization by calling loss.backward() to calculate the gradients, optimizer.zero_grad() to zero the gradients, and optimizer.step() to update the model's parameters using the optimizer's update rule.

Finally, the code checks if the current epoch is divisible by 100. If so, it performs some action (not specified in this code snippet), likely printing the current epoch or updating some other tracking variables.

In [None]:
# Train the model
for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(dtype=torch.long).to(device)
        
        # Forward pass
        outputs = model(words)
        # if y would be one-hot, we must apply
        # labels = torch.max(labels, 1)[1]
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if (epoch+1) % 100 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

print(f'final loss: {loss.item():.4f}')

data = {
"model_state": model.state_dict(),
"input_size": input_size,
"hidden_size": hidden_size,
"output_size": output_size,
"all_words": all_words,
"tags": tags
}

FILE = "NeuralNet.pth"
torch.save(data, FILE)

print(f'training complete. file saved to {FILE}')

final loss: 0.4436
training complete. file saved to data.pth


This code is likely part of a training script for a machine learning model.

The first line of code prints the final loss value of the training process to the console, with 4 decimal places. The loss variable holds the final value of the loss after training the model. The string f'final loss: {loss.item():.4f}' is a formatted string literal (f-string) in Python that includes the value of the loss.item() as a floating-point number with 4 decimal places.

The next section of code creates a dictionary called data that holds the state of the trained model, including the model's state dictionary (model.state_dict()), the input size (input_size), hidden size (hidden_size), output size (output_size), all possible words (all_words), and the tags (tags).

Then, the code saves this data dictionary to a file named data.pth using the torch.save function from the PyTorch library.

Finally, the code prints a message indicating that the training process is complete and that the file has been saved to the specified location.

In [None]:
with open('intents.json', 'r') as json_data:
    intents = json.load(json_data)

This code is reading a JSON file called "intents.json" and loading its contents into a Python dictionary called intents.

The with statement is a context manager that opens the file "intents.json" in read-only mode ('r') and assigns it to a file object called json_data.

The json.load function from the json library is then used to read the contents of the file and parse it into a Python dictionary, which is stored in the variable intents.

The with statement ensures that the file is properly closed when the code within the block is done executing, even if an exception occurs.

In [None]:

FILE = "data.pth"
data = torch.load(FILE)

data.values()

dict_values([OrderedDict([('l1.weight', tensor([[-0.0090,  0.6685,  0.6468,  ..., -0.8969, -0.1169, -0.6975],
        [ 0.3833, -0.3103, -0.3154,  ...,  0.5496,  0.9962,  2.2361],
        [ 1.3555,  0.0629,  0.0660,  ..., -0.1820,  1.5295,  2.4965],
        ...,
        [-0.8597,  0.4238,  0.4164,  ...,  2.7740, -0.2936,  0.7205],
        [-0.4355, -0.7829, -0.8443,  ..., -0.7112,  0.2399,  1.2039],
        [-0.1627,  0.9949,  1.0439,  ..., -0.2169, -0.3963, -0.0997]])), ('l1.bias', tensor([1.0556, 0.6138, 1.0726, 0.7923, 0.9884, 0.9254, 0.9628, 0.6426])), ('l2.weight', tensor([[-0.1925, -0.5988, -1.5587, -0.8185,  3.7884,  2.8817,  1.7482, -1.2925],
        [ 2.9410,  0.7388, -2.0822, -1.3033, -0.7815,  0.5627,  4.3102,  4.3405],
        [-0.1090,  2.8475,  1.9398, -1.1093, -1.3342,  4.6144, -1.9364,  0.4395],
        [-0.0671,  2.0554, -0.7888,  1.9145,  1.2639, -0.3187, -0.5887, -0.3200],
        [ 3.1033, -0.3702,  1.2055,  4.9134, -0.2263, -0.7107, -0.6705, -2.1229],
        [ 1.4

In [2]:
import torch
FILE = "LSTM.pth"
data = torch.load(FILE)

data.values()

dict_values([OrderedDict([('lstm.weight_ih_l0', tensor([[ 1.1474,  0.9451,  1.5024,  ..., -0.9791,  1.9258,  1.2304],
        [ 1.5534,  1.2227,  1.2927,  ...,  1.4793,  2.1366,  1.3233],
        [ 0.0570, -0.5108, -1.0204,  ..., -2.4994,  2.0224,  1.7964],
        ...,
        [ 1.3972,  0.7557,  1.0819,  ..., 10.6710,  4.3089,  1.1109],
        [ 1.6398, -2.2523, -1.7922,  ...,  8.9324,  1.3367,  2.7841],
        [ 0.7794,  1.5165,  1.9010,  ...,  9.5914,  3.2872,  1.7074]])), ('lstm.weight_hh_l0', tensor([[-0.2803,  0.0928, -0.0669,  0.0562, -0.1728, -0.1440,  0.0178,  0.0593],
        [ 0.1177, -0.1421, -0.0386,  0.1222,  0.3512,  0.2611,  0.0735,  0.1792],
        [-0.1485, -0.1473,  0.2365, -0.0515, -0.0907,  0.1099, -0.1695,  0.1455],
        [-0.1182, -0.1441,  0.0351, -0.2625, -0.0667, -0.0010, -0.3132,  0.1713],
        [ 0.3033,  0.2200, -0.0765,  0.1951,  0.1158,  0.0815, -0.3339, -0.0205],
        [ 0.2371, -0.2738, -0.1695, -0.0592,  0.0222,  0.1594, -0.0687, -0.2732],
  

In [None]:
def response(msg):
    sentence = tokenize(msg)
    X = bag_of_words(sentence, all_words)
    X = X.reshape(1, X.shape[0])
    X = torch.from_numpy(X).to(device)

    output = model(X)
    _, predicted = torch.max(output, dim=1)

    tag = tags[predicted.item()]

    probs = torch.softmax(output, dim=1)
    prob = probs[0][predicted.item()]
    if prob.item() > 0.75:
        for intent in intents['intents']:
            if tag == intent["tag"]:
                return random.choice(intent['responses'])
            
    return "I'm sorry, but I'm not sure what you mean by " + msg + " Can you please provide some additional context or clarify your question?"

This code is a function called response that takes a string argument msg. The code is likely part of a Natural Language Processing (NLP) or a chatbot application.

The response function performs the following operations:

Tokenize the msg string into a list of words using a function called tokenize.

Create a bag-of-words representation of the sentence using a function called bag_of_words, where all_words is a list of all possible words.

Convert the bag-of-words representation into a tensor using the torch library, which is PyTorch's tensor library.

Pass the tensor through a pre-trained model to get an output tensor.

Find the predicted tag for the input sentence by taking the maximum value from the output tensor along the second dimension (dim=1) and then using the tags list to map the predicted index to a tag.

Compute the softmax probabilities for the output tensor to get the confidence of the prediction.

If the confidence is above a threshold of 0.75, then search the intents list for the corresponding intent with the same tag as the predicted tag and return a random response from the list of responses for that intent.

If the confidence is below the threshold, return a default message indicating that the code is not sure what the input message means.

In [None]:
msg= "What medicines can I buy to help me with my common cold?"

response(msg)

'medicines you can consume : Dextromethorphan,Decongestant,Diphenhydramine,Crocin Cold & Flu Max, preventions that you must follow :Wash your hands,Avoid touching your face,Clean frequently used surfaces,Use hand sanitizers,SUGGESTED FOODS ARE:Garlic,Vitamin Câ€“containing fruits,Leafy greens,Broccoli,Oatmeal,Spices,Chicken Soup'

RNN MODEL

In [47]:
num_epochs = 1800
batch_size = 16
learning_rate = 0.005
input_size = len(X_train[0])
hidden_size = 8
output_size = len(tags)



class ChatDataset(Dataset):
    def __init__(self):
        self.n_samples = len(X_train)
        self.x_data = X_train
        self.y_data = y_train

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.n_samples
    

dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)
        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)
        self.relu = nn.ReLU()
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, input, hidden):
        batch_size = input.size(0)
        hidden = self.initHidden(batch_size).to(input.device)
        combined = torch.cat((input, hidden), 1)
        hidden = self.i2h(combined)
        output = self.i2o(combined)
        output = self.softmax(output)
        return output, hidden
    
    def initHidden(self, batch_size):
        return torch.zeros(batch_size, self.hidden_size)



model_instance= RNN(input_size, hidden_size, output_size).to(device)


# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_instance.parameters(), lr=learning_rate)

    
# Train the model
for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(device)
        
        # Forward pass
        hidden = model_instance.initHidden(batch_size).to(device)
        outputs, hidden = model_instance(words, hidden.view(1, -1))
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if (epoch+1) % 100 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
        
print(f'final loss: {loss.item():.4f}')

data = {
    "model_state": model_instance.state_dict(),
    "input_size": input_size,
    "hidden_size": hidden_size,
    "output_size": output_size,
    "all_words": all_words,
    "tags": tags
}

FILE = "RNN.pth"
torch.save(data, FILE)

# Load the model from the saved file
data = torch.load(FILE)
input_size = data["input_size"]
hidden_size = data["hidden_size"]
output_size = data["output_size"]
all_words = data['all_words']
tags = data['tags']
model_state = data["model_state"]

# Instantiate the model and load the state dictionary
model = RNN(input_size, hidden_size, output_size).to(device)
model.load_state_dict(model_state)

# Set the model to evaluation mode
model.eval()



Epoch [100/1800], Loss: 0.6095
Epoch [200/1800], Loss: 0.6283
Epoch [300/1800], Loss: 0.3574
Epoch [400/1800], Loss: 0.1832
Epoch [500/1800], Loss: 0.3637
Epoch [600/1800], Loss: 0.2211
Epoch [700/1800], Loss: 0.5057
Epoch [800/1800], Loss: 0.5015
Epoch [900/1800], Loss: 0.4607
Epoch [1000/1800], Loss: 0.4132
Epoch [1100/1800], Loss: 0.4320
Epoch [1200/1800], Loss: 0.8644
Epoch [1300/1800], Loss: 0.3786
Epoch [1400/1800], Loss: 0.1629
Epoch [1500/1800], Loss: 0.4892
Epoch [1600/1800], Loss: 0.6718
Epoch [1700/1800], Loss: 0.3981
Epoch [1800/1800], Loss: 0.1799
final loss: 0.1799


RNN(
  (i2h): Linear(in_features=519, out_features=8, bias=True)
  (i2o): Linear(in_features=519, out_features=111, bias=True)
  (relu): ReLU()
  (softmax): LogSoftmax(dim=1)
)

In [48]:
# Define the response function
def response(msg):
    sentence = tokenize(msg)
    X = bag_of_words(sentence, all_words)
    X = X.reshape(1, X.shape[0])
    X = torch.from_numpy(X).to(device)
    
    # hidden = model.initHidden(X.size(0))
    # output, hidden = model(X.unsqueeze(1), hidden)
    # output, hidden = model(X, model.initHidden(1))
    
    hidden = model_instance.initHidden(batch_size).to(device)
    output, hidden = model_instance(X, hidden.view(1, -1))

    _, predicted = torch.max(output, dim=1)
    tag = tags[predicted.item()]

    probs = torch.softmax(output, dim=1)
    prob = probs[0][predicted.item()]

    if prob.item() > 0.75:
        for intent in intents['intents']:
            if tag == intent["tag"]:
                return random.choice(intent['responses'])
    else:
        return "I'm sorry, but I'm not sure what you mean by " + msg + " Can you please provide some additional context or clarify your question?"

# Test the response function
msg= "What medicines can I buy to help me with my common cold?"
response(msg)

'medicines you can consume : Dextromethorphan,Decongestant,Diphenhydramine,Crocin Cold & Flu Max, preventions that you must follow :Wash your hands,Avoid touching your face,Clean frequently used surfaces,Use hand sanitizers,SUGGESTED FOODS ARE:Garlic,Vitamin Câ€“containing fruits,Leafy greens,Broccoli,Oatmeal,Spices,Chicken Soup'

LSTM MODEL

In [42]:
num_epochs = 1800
batch_size = 16
learning_rate = 0.005
input_size = len(X_train[0])
hidden_size = 8
output_size = len(tags)



class ChatDataset(Dataset):
    def __init__(self):
        self.n_samples = len(X_train)
        self.x_data = X_train
        self.y_data = y_train

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.n_samples
    

dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, input, hidden):
        output, hidden = self.lstm(input, hidden)
        output = self.fc(output[:, -1, :])
        output = self.softmax(output)
        return output, hidden
    
    
    def initHidden(self, batch_size):
        return (torch.zeros(1, batch_size, self.hidden_size).to(device),
                torch.zeros(1, batch_size, self.hidden_size).to(device))


model_instance= LSTM(input_size, hidden_size, output_size).to(device)


# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_instance.parameters(), lr=learning_rate)

    
# Train the model
for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(device)
        
        # Forward pass
        hidden = model_instance.initHidden(words.size(0))
        outputs, hidden = model_instance(words.unsqueeze(1), hidden)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if (epoch+1) % 100 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
        
print(f'final loss: {loss.item():.4f}')


data = {
    "model_state": model_instance.state_dict(),
    "input_size": input_size,
    "hidden_size": hidden_size,
    "output_size": output_size,
    "all_words": all_words,
    "tags": tags
}

FILE = "LSTM.pth"
# Load the model from the saved file
data = torch.load(FILE)
input_size = data["input_size"]
hidden_size = data["hidden_size"]
output_size = data["output_size"]
all_words = data['all_words']
tags = data['tags']
model_state = data["model_state"]

# Instantiate the model and load the state dictionary
model = LSTM(input_size, hidden_size, output_size).to(device)
model.load_state_dict(model_state)

# Set the model to evaluation mode
model.eval()



Epoch [100/1800], Loss: 0.3579
Epoch [200/1800], Loss: 0.4763
Epoch [300/1800], Loss: 0.2100
Epoch [400/1800], Loss: 0.7450
Epoch [500/1800], Loss: 0.4937
Epoch [600/1800], Loss: 0.3810
Epoch [700/1800], Loss: 0.2451
Epoch [800/1800], Loss: 0.2484
Epoch [900/1800], Loss: 0.4963
Epoch [1000/1800], Loss: 0.3385
Epoch [1100/1800], Loss: 0.8096
Epoch [1200/1800], Loss: 0.3489
Epoch [1300/1800], Loss: 0.6411
Epoch [1400/1800], Loss: 0.5068
Epoch [1500/1800], Loss: 0.6743
Epoch [1600/1800], Loss: 0.1940
Epoch [1700/1800], Loss: 0.4478
Epoch [1800/1800], Loss: 0.4414
final loss: 0.4414


LSTM(
  (lstm): LSTM(511, 8, batch_first=True)
  (fc): Linear(in_features=8, out_features=111, bias=True)
  (softmax): LogSoftmax(dim=1)
)

In [44]:
# Define the response function
def response(msg):
    sentence = tokenize(msg)
    X = bag_of_words(sentence, all_words)
    X = X.reshape(1, X.shape[0])
    X = torch.from_numpy(X).to(device)
    
    hidden = model.initHidden(X.size(0))
    output, hidden = model(X.unsqueeze(1), hidden)

    # output, hidden = model(X, model.initHidden(1))

    _, predicted = torch.max(output, dim=1)
    tag = tags[predicted.item()]

    probs = torch.softmax(output, dim=1)
    prob = probs[0][predicted.item()]

    if prob.item() > 0.75:
        for intent in intents['intents']:
            if tag == intent["tag"]:
                return random.choice(intent['responses'])
    else:
        return "I'm sorry, but I'm not sure what you mean by " + msg + " Can you please provide some additional context or clarify your question?"

# Test the response function
msg= "What medicines can I buy to help me with my common cold?"
response(msg)

'medicines you can consume : Dextromethorphan,Decongestant,Diphenhydramine,Crocin Cold & Flu Max, preventions that you must follow :Wash your hands,Avoid touching your face,Clean frequently used surfaces,Use hand sanitizers,SUGGESTED FOODS ARE:Garlic,Vitamin Câ€“containing fruits,Leafy greens,Broccoli,Oatmeal,Spices,Chicken Soup'

In [55]:
num_epochs = 1800
batch_size = 16
learning_rate = 0.005
input_size = len(X_train[0])
hidden_size = 8
output_size = len(tags)



class ChatDataset(Dataset):
    def __init__(self):
        self.n_samples = len(X_train)
        self.x_data = X_train
        self.y_data = y_train

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.n_samples
    

dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class BRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(BRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = 1
        self.num_directions = 2
        self.rnn = nn.RNN(input_size, hidden_size, num_layers=self.num_layers, 
                          bidirectional=True, batch_first=True)
        self.fc = nn.Linear(hidden_size * self.num_directions, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, input, hidden):
        output, hidden = self.rnn(input, hidden)
        output = self.fc(output[:, -1, :])
        output = self.softmax(output)
        return output, hidden
    
    def initHidden(self, batch_size):
        return torch.zeros(self.num_layers * self.num_directions, batch_size, self.hidden_size)


model_instance= BRNN(input_size, hidden_size, output_size).to(device)


# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_instance.parameters(), lr=learning_rate)

    
# Train the model
for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(device)
        
        # Forward pass
        hidden = model_instance.initHidden(words.size(0)).to(device)
        outputs, hidden = model_instance(words.unsqueeze(1), hidden)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if (epoch+1) % 100 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
        
print(f'final loss: {loss.item():.4f}')


data = {
    "model_state": model_instance.state_dict(),
    "input_size": input_size,
    "hidden_size": hidden_size,
    "output_size": output_size,
    "all_words": all_words,
    "tags": tags
}

FILE = "BRNN.pth"
torch.save(data, FILE)
# Load the model from the saved file
data = torch.load(FILE)
input_size = data["input_size"]
hidden_size = data["hidden_size"]
output_size = data["output_size"]
all_words = data['all_words']
tags = data['tags']
model_state = data["model_state"]

# Instantiate the model and load the state dictionary
model = BRNN(input_size, hidden_size, output_size).to(device)
model.load_state_dict(model_state)

# Set the model to evaluation mode
model.eval()


Epoch [100/1800], Loss: 0.5137
Epoch [200/1800], Loss: 0.4709
Epoch [300/1800], Loss: 0.6326
Epoch [400/1800], Loss: 0.3693
Epoch [500/1800], Loss: 0.5980
Epoch [600/1800], Loss: 0.6449
Epoch [700/1800], Loss: 0.4343
Epoch [800/1800], Loss: 0.4805
Epoch [900/1800], Loss: 0.4623
Epoch [1000/1800], Loss: 0.7467
Epoch [1100/1800], Loss: 0.4706
Epoch [1200/1800], Loss: 0.4591
Epoch [1300/1800], Loss: 0.1977
Epoch [1400/1800], Loss: 0.6523
Epoch [1500/1800], Loss: 0.3698
Epoch [1600/1800], Loss: 0.5496
Epoch [1700/1800], Loss: 0.5665
Epoch [1800/1800], Loss: 0.6868
final loss: 0.6868


BRNN(
  (rnn): RNN(511, 8, batch_first=True, bidirectional=True)
  (fc): Linear(in_features=16, out_features=111, bias=True)
  (softmax): LogSoftmax(dim=1)
)

In [56]:
# Define the response function
def response(msg):
    sentence = tokenize(msg)
    X = bag_of_words(sentence, all_words)
    X = X.reshape(1, X.shape[0])
    X = torch.from_numpy(X)
    
    # hidden = model.initHidden(X.size(0))
    # output, hidden = model(X.unsqueeze(1), hidden)
    # output, hidden = model(X, model.initHidden(1))
    hidden = model_instance.initHidden(X.size(0))
    output, hidden = model_instance(X.unsqueeze(1), hidden)

    _, predicted = torch.max(output, dim=1)
    tag = tags[predicted.item()]

    probs = torch.softmax(output, dim=1)
    prob = probs[0][predicted.item()]

    if prob.item() > 0.75:
        for intent in intents['intents']:
            if tag == intent["tag"]:
                return random.choice(intent['responses'])
    else:
        return "I'm sorry, but I'm not sure what you mean by " + msg + " Can you please provide some additional context or clarify your question?"

# Test the response function
# msg= "can you give me some suggestions for doctor consultations?"
msg="i am suffering from intense pain in tooth, intense pain in gum, tooth pain spreads to ear jaw neck , tender tooth"
response(msg)

'I suppose you have Dental Abscess'

In [53]:
num_epochs = 1800
batch_size = 16
learning_rate = 0.005
input_size = len(X_train[0])
hidden_size = 8
output_size = len(tags)



class ChatDataset(Dataset):
    def __init__(self):
        self.n_samples = len(X_train)
        self.x_data = X_train
        self.y_data = y_train

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.n_samples
    

dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class BLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(BLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = 1
        self.num_directions = 2
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=self.num_layers, 
                            bidirectional=True, batch_first=True)
        self.fc = nn.Linear(hidden_size * self.num_directions, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, input, hidden):
        output, hidden = self.lstm(input, hidden)
        output = self.fc(output[:, -1, :])
        output = self.softmax(output)
        return output, hidden
    
    def initHidden(self, batch_size):
        return (torch.zeros(self.num_layers * self.num_directions, batch_size, self.hidden_size),
                torch.zeros(self.num_layers * self.num_directions, batch_size, self.hidden_size))


model_instance= BLSTM(input_size, hidden_size, output_size).to(device)



# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_instance.parameters(), lr=learning_rate)

    
# Train the model
for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(device)
        
        # Forward pass
        hidden = model_instance.initHidden(words.size(0))
        outputs, hidden =model_instance(words.unsqueeze(1), hidden)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if (epoch+1) % 100 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
        
print(f'final loss: {loss.item():.4f}')


data = {
    "model_state": model_instance.state_dict(),
    "input_size": input_size,
    "hidden_size": hidden_size,
    "output_size": output_size,
    "all_words": all_words,
    "tags": tags
}

FILE = "BLSTM.pth"
torch.save(data, FILE)

print(f'training complete. file saved to {FILE}')



Epoch [100/1800], Loss: 0.4067
Epoch [200/1800], Loss: 0.3187
Epoch [300/1800], Loss: 0.2031
Epoch [400/1800], Loss: 0.5771
Epoch [500/1800], Loss: 0.6020
Epoch [600/1800], Loss: 0.5164
Epoch [700/1800], Loss: 0.7736
Epoch [800/1800], Loss: 0.8074
Epoch [900/1800], Loss: 0.2520
Epoch [1000/1800], Loss: 0.4605
Epoch [1100/1800], Loss: 0.1318
Epoch [1200/1800], Loss: 0.3032
Epoch [1300/1800], Loss: 0.5715
Epoch [1400/1800], Loss: 0.5283
Epoch [1500/1800], Loss: 0.3431
Epoch [1600/1800], Loss: 0.5007
Epoch [1700/1800], Loss: 0.7699
Epoch [1800/1800], Loss: 0.3764
final loss: 0.3764
training complete. file saved to BLSTM.pth


In [57]:
# Define the response function
def response(msg):
    sentence = tokenize(msg)
    X = bag_of_words(sentence, all_words)
    X = X.reshape(1, X.shape[0])
    X = torch.from_numpy(X).to(device)
    
    # hidden = model.initHidden(X.size(0))
    # output, hidden = model(X.unsqueeze(1), hidden)
    # output, hidden = model(X, model.initHidden(1))
    hidden = model_instance.initHidden(X.size(0)).to(device)
    output, hidden = model_instance(X.unsqueeze(1), hidden)

    _, predicted = torch.max(output, dim=1)
    tag = tags[predicted.item()]

    probs = torch.softmax(output, dim=1)
    prob = probs[0][predicted.item()]

    if prob.item() > 0.75:
        for intent in intents['intents']:
            if tag == intent["tag"]:
                return random.choice(intent['responses'])
    else:
        return "I'm sorry, but I'm not sure what you mean by " + msg + " Can you please provide some additional context or clarify your question?"

# Test the response function
msg= "can you give me some suggestion for doctor consultations?"
response(msg)

'You can contact various doctors here for any kind of consultation: 1. https://www.1mg.com/online-doctor-consultation, 2. https://www.tatahealth.com/online-doctor-consultation/general-physician, 3. https://www.doconline.com/, or you can pay a visit to your local area doctor or family doctor.'

In [68]:
from sklearn import neural_network


num_epochs = 1800
batch_size = 16
learning_rate = 0.005
input_size = len(X_train[0])
hidden_size = 8
output_size = len(tags)


class ChatDataset(Dataset):
    def __init__(self):
        self.n_samples = len(X_train)
        self.x_data = X_train
        self.y_data = y_train

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.n_samples
    

dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size) 
        self.l2 = nn.Linear(hidden_size, hidden_size) 
        self.l3 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        out = self.relu(out)
        out = self.l3(out)
        # no activation and no softmax at the end
        return out

model_instance= NeuralNet(input_size, hidden_size, output_size).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_instance.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(dtype=torch.long).to(device)
        
        # Forward pass
        outputs = model_instance(words)
        # if y would be one-hot, we must apply
        # labels = torch.max(labels, 1)[1]
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if (epoch+1) % 100 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


print(f'final loss: {loss.item():.4f}')

data = {
"model_state": model_instance.state_dict(),
"input_size": input_size,
"hidden_size": hidden_size,
"output_size": output_size,
"all_words": all_words,
"tags": tags
}

FILE = "NeuralNet.pth"
torch.save(data, FILE)

# Load the model from the saved file
data = torch.load(FILE)
input_size = data["input_size"]
hidden_size = data["hidden_size"]
output_size = data["output_size"]
all_words = data['all_words']
tags = data['tags']
model_state = data["model_state"]

# Instantiate the model and load the state dictionary
model = NeuralNet(input_size, hidden_size, output_size).to(device)
model.load_state_dict(model_state)

# Set the model to evaluation mode
model.eval()

Epoch [100/1800], Loss: 0.4649
Epoch [200/1800], Loss: 0.6486
Epoch [300/1800], Loss: 0.4993
Epoch [400/1800], Loss: 0.4694
Epoch [500/1800], Loss: 0.3370
Epoch [600/1800], Loss: 0.6427
Epoch [700/1800], Loss: 0.0019
Epoch [800/1800], Loss: 0.4434
Epoch [900/1800], Loss: 0.7344
Epoch [1000/1800], Loss: 0.3840
Epoch [1100/1800], Loss: 0.2404
Epoch [1200/1800], Loss: 0.2325
Epoch [1300/1800], Loss: 0.5611
Epoch [1400/1800], Loss: 0.2622
Epoch [1500/1800], Loss: 0.8873
Epoch [1600/1800], Loss: 0.4518
Epoch [1700/1800], Loss: 0.4517
Epoch [1800/1800], Loss: 0.2463
final loss: 0.2463


NeuralNet(
  (l1): Linear(in_features=511, out_features=8, bias=True)
  (l2): Linear(in_features=8, out_features=8, bias=True)
  (l3): Linear(in_features=8, out_features=111, bias=True)
  (relu): ReLU()
)

In [72]:

def response(msg):
    sentence = tokenize(msg)
    X = bag_of_words(sentence, all_words)
    X = X.reshape(1, X.shape[0])
    X = torch.from_numpy(X).to(device)

    output = model(X)
    _, predicted = torch.max(output, dim=1)

    tag = tags[predicted.item()]

    probs = torch.softmax(output, dim=1)
    prob = probs[0][predicted.item()]
    if prob.item() > 0.75:
        for intent in intents['intents']:
            if tag == intent["tag"]:
                return random.choice(intent['responses'])
            
    return "I'm sorry, but I'm not sure what you mean by " + msg + " Can you please provide some additional context or clarify your question?"


msg= "i am suffering from hoarseness, cough, lump or swelling in neck"

response(msg)




'medicines you can consume : Dextromethorphan,Decongestant,Diphenhydramine,Crocin Cold & Flu Max, preventions that you must follow :Wash your hands,Avoid touching your face,Clean frequently used surfaces,Use hand sanitizers,SUGGESTED FOODS ARE:Garlic,Vitamin Câ€“containing fruits,Leafy greens,Broccoli,Oatmeal,Spices,Chicken Soup'

In [38]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

import json
import random

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel

# Define data and file paths
intents_file = "intents.json"
model_path = "bert_classifier.pth"

# Set random seed for reproducibility
random.seed(0)

# Load intents file
with open(intents_file, "r") as f:
    intents = json.load(f)

# Remove duplicates from all_words and tags
all_words = sorted(set(all_words))
tags = sorted(set(tags))

# Define a function to create a bag of words from a pattern sentence
def bag_of_words(pattern_sentence, all_words):
    bag = [0] * len(all_words)
    for word in pattern_sentence:
        for i, w in enumerate(all_words):
            if w == word:
                bag[i] = 1
    return bag

# Shuffle the xy list
random.shuffle(xy)

# Create training data
X_train = []
y_train = []
for (pattern_sentence, tag) in xy:
    # X: bag of words for each pattern_sentence
    bag = bag_of_words(pattern_sentence, all_words)
    X_train.append(bag)
    # y: PyTorch CrossEntropyLoss needs only class labels, not one-hot
    label = tags.index(tag)
    y_train.append(label)

# Define hyperparameters
num_epochs = 4
batch_size = 32
learning_rate = 0.001
output_size = len(tags)

# Load the pre-trained BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Define a custom dataset for the chat data
class ChatDataset(Dataset):
    def __init__(self, X, y, tokenizer):
        self.X = X
        self.y = y
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        # Encode the input sequence and return it along with the label
        input_sequence = " ".join(map(str, self.X[idx]))

        input_dict = self.tokenizer(input_sequence, padding="max_length", truncation=True, max_length=128, return_tensors='pt')
        return {"input_ids": input_dict["input_ids"][0], "attention_mask": input_dict["attention_mask"][0]}, self.y[idx]


# Create train and validation datasets and data loaders
val_split = 0.1
val_size = int(val_split * len(X_train))
y_train = torch.tensor(y_train)
train_dataset = ChatDataset(X_train[val_size:], y_train[val_size:], tokenizer)
val_dataset = ChatDataset(X_train[:val_size], y_train[:val_size], tokenizer)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


bert = BertModel.from_pretrained('bert-base-uncased')
# Define the BERT model
class BertClassifier(nn.Module):
    def __init__(self, output_size):
        super(BertClassifier, self).__init__()
        self.bert =bert
        self.dropout = nn.Dropout(p=0.2)
        self.fc = nn.Linear(self.bert.config.hidden_size, output_size)
        
    def forward(self, input_ids, attention_mask):
        output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = output.pooler_output
        pooled_output = self.dropout(pooled_output)
        logits = self.fc(pooled_output)
        return logits

model = BertClassifier(output_size).to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

        

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [40]:
# Train the model
for epoch in range(4):
    for (inputs, labels) in train_loader:
        input_ids = inputs['input_ids'].squeeze(1).to(device)
        attention_mask = inputs['attention_mask'].squeeze(1).to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(input_ids, attention_mask)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if (epoch+1) % 100 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
        
print(f'final loss: {loss.item():.4f}')

# Save the trained model
data = {
    "input_size": input_size,
    "hidden_size": hidden_size,
    "output_size": output_size,
    "model_state": model.state_dict(),
    "all_words": all_words,
    "tags": tags
}
FILE = "bert_classifier.pth"
torch.save(data, FILE)

print(f'Training complete. Model saved to {FILE}.')

final loss: 5.2785
Training complete. Model saved to bert_classifier.pth.


In [3]:
import torch
# Load the model from the saved file
data = torch.load(FILE)
input_size = data["input_size"]
hidden_size = data["hidden_size"]
output_size = data["output_size"]
all_words = data['all_words']
tags = data['tags']
model_state = data["model_state"]

# Instantiate the model and load the state dictionary
model = BertClassifier(output_size).to(device)
model.load_state_dict(model_state)

# Set the model to evaluation mode
model.eval()



# Define the response function
def response(msg):
    sentence = tokenize(msg)
    X = bag_of_words(sentence, all_words)
    input_ids = torch.LongTensor(X).unsqueeze(0).to(device)
    attention_mask = (input_ids != 0).to(device)
    output = model(input_ids, attention_mask)

    _, predicted = torch.max(output, dim=1)
    tag = tags[predicted.item()]

    probs = torch.softmax(output, dim=1)
    prob = probs[0][predicted.item()]

    if prob.item() > 0.75:
        for intent in intents['intents']:
            if tag == intent["tag"]:
                return random.choice(intent['responses'])
    else:
        return "I'm sorry, but I'm not sure what you mean by " + msg + " Can you please provide some additional context or clarify your question?"



# Test the response function
msg= "can you give me some suggestion for doctor consultations?"
response(msg)

NameError: name 'BertClassifier' is not defined