In [9]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout , Activation, Flatten , Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import SGD
import random

In [14]:
import nltk
from nltk.stem import WordNetLemmatizer
nltk.download('wordnet')
lemmatizer = WordNetLemmatizer()
import json
import pickle
import tensorflow

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Sarva\AppData\Roaming\nltk_data...


In [11]:
words=[]
labels = []
docs = []
ignore_list = ['?', '!']

In [12]:
dataset = open('intents.json').read()
intents = json.loads(dataset)

In [13]:
for intent in intents['intents']:
    for pattern in intent['patterns']:

        #tokenize each word
        word_token = nltk.word_tokenize(pattern)
        words.extend(word_token)
        #add documents in the corpus
        docs.append((word_token, intent['tag']))

        # add to our labels list
        if intent['tag'] not in labels:
            labels.append(intent['tag'])

In [15]:
# lemmatize each word, and sort words by removing duplicates:
words = [lemmatizer.lemmatize(word.lower()) for word in words if word not in ignore_list]
words = sorted(list(set(words)))

In [16]:
# sort labels:
labels = sorted(list(set(labels)))

In [17]:
pickle.dump(words,open('words.pkl','wb'))
pickle.dump(labels,open('labels.pkl','wb'))

In [18]:
# creating our training data:
training_data = []
# creating an empty array for our output (with size same as length of labels):
output = [0]*len(labels)

for doc in docs:
    bag_of_words = []
    pattern_words = doc[0]
    #lemmatize pattern words:
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
    
    for w in words:
        if w in pattern_words:
            bag_of_words.append(1)
        else:
            bag_of_words.append(0)
            
    output_row = list(output)
    output_row[labels.index(doc[1])] = 1
    
    training_data.append([bag_of_words,output_row])

In [25]:
import numpy as np
import random

random.shuffle(training_data)

# Separate features and labels
X = []
y = []

for features, label in training_data:
    X.append(features)
    y.append(label)

X = np.array(X, dtype=object)  # or remove dtype=object if all features are same length
y = np.array(y)


In [27]:
# Now we have to create training and testing list:
x_train = list(training_data[:,0])
y_train = list(training_data[:,1])

TypeError: list indices must be integers or slices, not tuple

In [None]:
# Creating Model:

model = Sequential()
model.add(Dense(128, input_shape=(len(x_train[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(y_train[0]), activation='softmax'))

In [None]:
model.summary()

In [None]:
sgd_optimizer = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd_optimizer, metrics=['accuracy'])

In [None]:
# fit the model 
history = model.fit(np.array(x_train), np.array(y_train), epochs=200, batch_size=5, verbose=1)

In [None]:
model.save('chatbot_Application_model.h5', history)