# Training data


In [None]:
# Module for intents json file
import json

# Modules for Natural Language Processing
import nltk
import pickle
nltk.download('wordnet')
nltk.download('punkt')
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Modules for Vectorization and Encoding
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import OneHotEncoder

# Modules for Neural Networks
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import SGD

# Module for random responses
import random

lemmatizer = WordNetLemmatizer()

intents = json.loads(open('intents.json').read())
words = []
classes = []
documents = []
ignore_letters = ['?', '!', '.', ',']

for intent in intents['intents']:
  for pattern in intent['patterns']:
    word_list = nltk.word_tokenize(pattern)
    words.extend(word_list)
    documents.append((word_list ,intent['tag']))
    if intent['tag'] not in classes:
      classes.append(intent['tag'])

words = [lemmatizer.lemmatize(word) for word in words if word  not in ignore_letters]
words = sorted(set(words))

classes = sorted(set(classes))

pickle.dump(words, open('words.pkl', 'wb'))
pickle.dump(classes, open('classes.pkl', 'wb'))

training = []
output_empty = [0] * len(classes)

for document in documents:
  bag = []
  word_patterns = document[0]
  word_patterns = [lemmatizer.lemmatize(word.lower()) for word in word_patterns]
  for word in words:
    bag.append(1) if word in word_patterns else bag.append(0)

  output_row = list(output_empty)
  output_row[classes.index(document[1])] = 1
  training.append([bag, output_row])

random.shuffle(training)
training = np.array(training)

train_x = list(training[:, 0])
train_y = list(training[:, 1])

model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation= 'relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation= 'softmax'))

sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

hist = model.fit(np.array(train_x), np.array(train_y),epochs=20, batch_size=5, verbose=1)
model.save('chatbot_model.h5', hist)
print('Done')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


  "The `lr` argument is deprecated, use `learning_rate` instead.")


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Done


*Jake VanderPlas*

![Book Cover](https://github.com/jakevdp/PythonDataScienceHandbook/blob/master/notebooks/figures/PDSH-cover.png?raw=1)

This is the Jupyter notebook version of the [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do) by Jake VanderPlas; the content is available [on GitHub](https://github.com/jakevdp/PythonDataScienceHandbook).*
The text is released under the [CC-BY-NC-ND license](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode), and code is released under the [MIT license](https://opensource.org/licenses/MIT). If you find this content useful, please consider supporting the work by [buying the book](http://shop.oreilly.com/product/0636920034919.do)!

## Table of Contents

### [Preface](00.00-Preface.ipynb)

### [1. IPython: Beyond Normal Python](01.00-IPython-Beyond-Normal-Python.ipynb)
- [Help and Documentation in IPython](01.01-Help-And-Documentation.ipynb)
- [Keyboard Shortcuts in the IPython Shell](01.02-Shell-Keyboard-Shortcuts.ipynb)
- [IPython Magic Commands](01.03-Magic-Commands.ipynb)
- [Input and Output History](01.04-Input-Output-History.ipynb)
- [IPython and Shell Commands](01.05-IPython-And-Shell-Commands.ipynb)
- [Errors and Debugging](01.06-Errors-and-Debugging.ipynb)
- [Profiling and Timing Code](01.07-Timing-and-Profiling.ipynb)
- [More IPython Resources](01.08-More-IPython-Resources.ipynb)

### [2. Introduction to NumPy](02.00-Introduction-to-NumPy.ipynb)
- [Understanding Data Types in Python](02.01-Understanding-Data-Types.ipynb)
- [The Basics of NumPy Arrays](02.02-The-Basics-Of-NumPy-Arrays.ipynb)
- [Computation on NumPy Arrays: Universal Functions](02.03-Computation-on-arrays-ufuncs.ipynb)
- [Aggregations: Min, Max, and Everything In Between](02.04-Computation-on-arrays-aggregates.ipynb)
- [Computation on Arrays: Broadcasting](02.05-Computation-on-arrays-broadcasting.ipynb)
- [Comparisons, Masks, and Boolean Logic](02.06-Boolean-Arrays-and-Masks.ipynb)
- [Fancy Indexing](02.07-Fancy-Indexing.ipynb)
- [Sorting Arrays](02.08-Sorting.ipynb)
- [Structured Data: NumPy's Structured Arrays](02.09-Structured-Data-NumPy.ipynb)

### [3. Data Manipulation with Pandas](03.00-Introduction-to-Pandas.ipynb)
- [Introducing Pandas Objects](03.01-Introducing-Pandas-Objects.ipynb)
- [Data Indexing and Selection](03.02-Data-Indexing-and-Selection.ipynb)
- [Operating on Data in Pandas](03.03-Operations-in-Pandas.ipynb)
- [Handling Missing Data](03.04-Missing-Values.ipynb)
- [Hierarchical Indexing](03.05-Hierarchical-Indexing.ipynb)
- [Combining Datasets: Concat and Append](03.06-Concat-And-Append.ipynb)
- [Combining Datasets: Merge and Join](03.07-Merge-and-Join.ipynb)
- [Aggregation and Grouping](03.08-Aggregation-and-Grouping.ipynb)
- [Pivot Tables](03.09-Pivot-Tables.ipynb)
- [Vectorized String Operations](03.10-Working-With-Strings.ipynb)
- [Working with Time Series](03.11-Working-with-Time-Series.ipynb)
- [High-Performance Pandas: eval() and query()](03.12-Performance-Eval-and-Query.ipynb)
- [Further Resources](03.13-Further-Resources.ipynb)

### [4. Visualization with Matplotlib](04.00-Introduction-To-Matplotlib.ipynb)
- [Simple Line Plots](04.01-Simple-Line-Plots.ipynb)
- [Simple Scatter Plots](04.02-Simple-Scatter-Plots.ipynb)
- [Visualizing Errors](04.03-Errorbars.ipynb)
- [Density and Contour Plots](04.04-Density-and-Contour-Plots.ipynb)
- [Histograms, Binnings, and Density](04.05-Histograms-and-Binnings.ipynb)
- [Customizing Plot Legends](04.06-Customizing-Legends.ipynb)
- [Customizing Colorbars](04.07-Customizing-Colorbars.ipynb)
- [Multiple Subplots](04.08-Multiple-Subplots.ipynb)
- [Text and Annotation](04.09-Text-and-Annotation.ipynb)
- [Customizing Ticks](04.10-Customizing-Ticks.ipynb)
- [Customizing Matplotlib: Configurations and Stylesheets](04.11-Settings-and-Stylesheets.ipynb)
- [Three-Dimensional Plotting in Matplotlib](04.12-Three-Dimensional-Plotting.ipynb)
- [Geographic Data with Basemap](04.13-Geographic-Data-With-Basemap.ipynb)
- [Visualization with Seaborn](04.14-Visualization-With-Seaborn.ipynb)
- [Further Resources](04.15-Further-Resources.ipynb)

### [5. Machine Learning](05.00-Machine-Learning.ipynb)
- [What Is Machine Learning?](05.01-What-Is-Machine-Learning.ipynb)
- [Introducing Scikit-Learn](05.02-Introducing-Scikit-Learn.ipynb)
- [Hyperparameters and Model Validation](05.03-Hyperparameters-and-Model-Validation.ipynb)
- [Feature Engineering](05.04-Feature-Engineering.ipynb)
- [In Depth: Naive Bayes Classification](05.05-Naive-Bayes.ipynb)
- [In Depth: Linear Regression](05.06-Linear-Regression.ipynb)
- [In-Depth: Support Vector Machines](05.07-Support-Vector-Machines.ipynb)
- [In-Depth: Decision Trees and Random Forests](05.08-Random-Forests.ipynb)
- [In Depth: Principal Component Analysis](05.09-Principal-Component-Analysis.ipynb)
- [In-Depth: Manifold Learning](05.10-Manifold-Learning.ipynb)
- [In Depth: k-Means Clustering](05.11-K-Means.ipynb)
- [In Depth: Gaussian Mixture Models](05.12-Gaussian-Mixtures.ipynb)
- [In-Depth: Kernel Density Estimation](05.13-Kernel-Density-Estimation.ipynb)
- [Application: A Face Detection Pipeline](05.14-Image-Features.ipynb)
- [Further Machine Learning Resources](05.15-Learning-More.ipynb)

### [Appendix: Figure Code](06.00-Figure-Code.ipynb)