## Import libraries

In [1]:
import numpy as np
import json
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
import torch
import torch.nn as nn
from torch.utils.data  import DataLoader , Dataset
import random


#nltk.download('punkt')
#nltk.download('stopwords')

## Read and Clean the data 

In [2]:
with open('farm.json' , 'r') as f:
    data = json.load(f)

#### Create function that clean the data from the noise 

In [3]:
def tokenize_and_stem_sentence_and_remove_stopWords(sentence):

    # Tokenization
    words = word_tokenize(sentence)
    
    # Stemming and remove stop words
    additional_stopwords =['?' , '!' , '.' , ',']    
    stop_words = set(stopwords.words("english")) .union(additional_stopwords) 
    
    stemmer = PorterStemmer()
    stemmed_words = [stemmer.stem(word) for word in words if word not in stop_words]
    
    return stemmed_words


In [4]:
vocabulary = []
xy = []

for i, QA in enumerate(data):
    question = QA['Question']
    # STEM AND TOKKENIZE THE QUESTION 
    question = tokenize_and_stem_sentence_and_remove_stopWords(question)
    vocabulary.extend(question)
    xy.append((question , i)) 
    
vocabulary = sorted(set(vocabulary))

n_classes = len(xy)



In [5]:
def bag_of_words(dictionary , sentence):   
    # Before invoking this function, ensure that the sentence has been processed with tokenize_and_stem_sentence_and_remove_stopwords()
    bag = np.zeros(len(dictionary) , dtype=np.float32)    
    for word in sentence:
        
        if word in dictionary:
            index_word = dictionary.index(word)
            bag[index_word] += 1.0
    
    return bag 
    

In [6]:
X_train = []
y_train = []

for (Q , label) in xy :
    bag = bag_of_words(vocabulary,Q)
    X_train.append(bag)
    y_train.append(label)
    
X_train = np.array(X_train)
y_train = np.array(y_train)

In [7]:
class ChatDataset(Dataset):
    def __init__(self):
        self.n_samples = len(X_train)
        self.X_data = X_train
        self.y_data = y_train

    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]

    def __len__(self):
        return self.n_samples

In [8]:
dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset, batch_size=8 , shuffle=True , num_workers=0)

# Build the model

In [9]:
# hyper_parameters
batch_size = 8
input_size = len(X_train[0])
hidden_size = 8
learning_rate = 0.001
num_epochs = 2000

In [10]:
class NeuralNet(nn.Module):
    def __init__(self, input_size , hidden_size , num_classes):
        super(NeuralNet , self).__init__()
        self.l1 = nn.Linear(input_size , hidden_size)
        self.l2 = nn.Linear(hidden_size , hidden_size)
        self.l3 = nn.Linear(hidden_size , num_classes)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        out = self.relu(self.l1(x))
        out = self.relu(self.l2(out))
        out = self.l3(out)
        return out

In [11]:
model = NeuralNet(input_size, hidden_size, n_classes)

# loss and optimizer
citerion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        labels = labels.to(dtype=torch.long)

        outputs = model(words)
        loss = citerion(outputs , labels)

        # backward and optimizer
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if ( epoch + 1) % 100 == 0:
        print(f'epoch{epoch + 1}/{num_epochs} , loss={loss.item():.4f}')

print(f'final loss , loss={loss.item():.4f}')


epoch100/2000 , loss=1.8304
epoch200/2000 , loss=0.5257
epoch300/2000 , loss=0.1000
epoch400/2000 , loss=0.0413
epoch500/2000 , loss=0.0156
epoch600/2000 , loss=0.0064
epoch700/2000 , loss=0.0040
epoch800/2000 , loss=0.0024
epoch900/2000 , loss=0.0022
epoch1000/2000 , loss=0.0017
epoch1100/2000 , loss=0.0011
epoch1200/2000 , loss=0.0005
epoch1300/2000 , loss=0.0005
epoch1400/2000 , loss=0.0004
epoch1500/2000 , loss=0.0003
epoch1600/2000 , loss=0.0002
epoch1700/2000 , loss=0.0001
epoch1800/2000 , loss=0.0001
epoch1900/2000 , loss=0.0001
epoch2000/2000 , loss=0.0000
final loss , loss=0.0000


In [12]:
bot_name = 'Crop Health Chat Bot  '
print("Crop Health Chat Bot : Hello how can I help you ?")
while True:
    sentence = input("You: ")
    if sentence == "quit":
         break

    sentence = tokenize_and_stem_sentence_and_remove_stopWords(sentence)
    X = bag_of_words(vocabulary, sentence )
    X = X.reshape(1, X.shape[0])
    X = torch.from_numpy(X)

    output = model(X)
    _, predicted = torch.max(output, dim=1)

    probs = torch.softmax(output, dim=1)
    prob = probs[0][predicted.item()]
   # print(prob)

    if prob.item() > 0.5:
        print(f"You : {sentence} " )
        print(f"{bot_name}:{random.choice(data[predicted.item()]['Answers'])}")
        
    
    else:
        print(f"{bot_name}: I do not understand...")



Crop Health Chat Bot : Hello how can I help you ?
