How to: https://www.youtube.com/watch?v=8qwowmiXANQ
part 3: https://www.youtube.com/watch?v=Da-iHgrmHYg
part 4: https://www.youtube.com/watch?v=k1SzvvFtl4w

In [None]:
# Import required libraries for training the chatbot
import json  # For reading JSON files (intents.json)
# Import custom preprocessing functions from our local preprocessing.py file
from preprocessing import tokenize, stem, bag_of_words  # Functions for text processing
import numpy as np  # For numerical operations and array handling
import torch  # PyTorch main library for deep learning
import torch.nn as nn  # Neural network modules from PyTorch
from torch.utils.data import Dataset, DataLoader  # For creating datasets and data loaders
# Import our custom neural network model from local model.py file
from model import NeuralNet  # Our 3-layer neural network class

In [None]:
# Load the intents data from JSON file
# This file contains all the training patterns, tags, and responses for the chatbot
with open('intents.json', 'r') as f:  # Open the intents.json file in read mode
    intents = json.load(f)  # Parse the JSON content into a Python dictionary

In [None]:

# Initialize empty lists to store training data
all_words = []  # Will store all unique words from all patterns
tags = []  # Will store all unique intent tags
xy = []  # Will store (tokenized_pattern, tag) pairs for training

# Process each intent from the JSON file
for intent in intents['intents']:  # Loop through each intent category
    tag = intent['tag']  # Get the intent tag (e.g., 'greeting', 'goodbye')
    tags.append(tag)  # Add tag to our tags list
    
    # Process each pattern (example sentence) for this intent
    for pattern in intent['patterns']:  # Loop through each example sentence
        w = tokenize(pattern)  # Split sentence into individual words/tokens
        all_words.extend(w)  # Add all words from this pattern to our word list
        xy.append((w, tag))  # Store the tokenized pattern with its corresponding tag

# Define words to ignore during processing (punctuation marks)
ignore_words = ['?', '!', ',', '.']

# Clean and process the word list
all_words = [stem(w) for w in all_words if w not in ignore_words]  # Stem each word and remove ignored words
all_words = sorted(set(all_words))  # Remove duplicates and sort alphabetically
tags = sorted(set(tags))  # Remove duplicate tags and sort alphabetically

# Print statistics about our training data
print(len(xy), "patterns")  # Total number of training patterns
print('\n', len(tags), "tags:", tags)  # Number of unique intent categories
print('\n', len(all_words), "unique stemmed words:", all_words)  # Number of unique words in vocabulary

93 patterns

 13 tags: ['events', 'family_tree', 'features', 'goodbye', 'greeting', 'help', 'messaging', 'posts', 'privacy', 'profile', 'stories', 'subscription', 'thanks']

 114 unique stemmed words: ["'m", "'s", '24-hour', 'a', 'about', 'add', 'affinit', 'afternoon', 'an', 'anyon', 'app', 'appreci', 'are', 'avail', 'bill', 'bye', 'can', 'care', 'chat', 'commun', 'confus', 'content', 'control', 'cost', 'creat', 'custom', 'data', 'day', 'direct', 'do', 'doe', 'edit', 'ephemer', 'even', 'event', 'famili', 'farewel', 'featur', 'function', 'gather', 'get', 'good', 'goodby', 'have', 'hello', 'help', 'hey', 'hi', 'how', 'i', 'inform', 'is', 'it', 'later', 'lot', 'main', 'manag', 'me', 'member', 'messag', 'morn', 'much', 'my', "n't", 'need', 'organ', 'payment', 'person', 'photo', 'plan', 'post', 'premium', 'price', 'privaci', 'privat', 'profil', 'protect', 'relationship', 'rsvp', 'section', 'secur', 'see', 'set', 'share', 'show', 'so', 'social', 'soon', 'start', 'stori', 'subscript', 'suppor

In [None]:
# Create training data arrays for the neural network
X_train = []  # Will store bag-of-words vectors (input features)
y_train = []  # Will store corresponding labels (output targets)

# Convert each pattern into numerical format for training
for (pattern_sentence, tag) in xy:  # Loop through each (tokenized_pattern, tag) pair
    bag = bag_of_words(pattern_sentence, all_words)  # Convert pattern to bag-of-words vector
    X_train.append(bag)  # Add the bag-of-words vector to training inputs

    label = tags.index(tag)  # Convert tag name to numerical index (0, 1, 2, etc.)
    y_train.append(label)  # Add the numerical label to training targets

# Convert lists to numpy arrays for efficient computation
X_train = np.array(X_train)  # Convert to numpy array for faster processing
y_train = np.array(y_train)  # Convert to numpy array for faster processing

In [None]:
# Custom Dataset class for PyTorch DataLoader
class ChatDataset(Dataset):
    def __init__(self):
        # Initialize dataset with our training data
        self.n_samples = len(X_train)  # Total number of training samples
        self.x_data = X_train  # Input features (bag-of-words vectors)
        self.y_data = y_train  # Target labels (intent indices)

    def __getitem__(self, index):
        # Return a single training sample (input, target) pair
        return self.x_data[index], self.y_data[index]  # Return (features, label) for given index

    def __len__(self):
        # Return the total number of samples in the dataset
        return self.n_samples



In [None]:
# Define hyperparameters for training the neural network
batch_size = 8  # Number of samples processed in each training batch
hidden_size = 8  # Number of neurons in the hidden layer
output_size = len(tags)  # Number of output classes (intent categories)
input_size = len(X_train[0])  # Number of input features (vocabulary size)
learning_rate = 0.001  # How fast the model learns (step size for optimization)
num_epochs = 1000  # Number of complete passes through the training data



In [None]:

# Create dataset and data loader for training
dataset = ChatDataset()  # Create an instance of our custom dataset
train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=0)  # Create data loader with batching and shuffling

In [None]:
# Set up device for training (GPU if available, otherwise CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # Check if CUDA is available for GPU training
model = NeuralNet(input_size, hidden_size, output_size).to(device)  # Create neural network and move it to the device


In [None]:
# Set up loss function and optimizer for training
criterion = nn.CrossEntropyLoss()  # Cross-entropy loss for multi-class classification
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  # Adam optimizer for updating model weights

# Train the model
for epoch in range(num_epochs):  # Loop through each training epoch
    for (words, labels) in train_loader:  # Loop through each batch of training data
        words = words.to(device)  # Move input data to device (GPU/CPU)
        labels = labels.to(device)  # Move target labels to device (GPU/CPU)

        # Forward pass: compute predictions
        outputs = model(words)  # Get model predictions for current batch
        loss = criterion(outputs, labels)  # Calculate loss between predictions and actual labels

        # Backward pass: compute gradients and update weights
        optimizer.zero_grad()  # Clear gradients from previous iteration
        loss.backward()  # Compute gradients of loss with respect to model parameters
        optimizer.step()  # Update model parameters using computed gradients

    # Print training progress every 100 epochs
    if (epoch +1) % 100 == 0:
        print(f'epoch {epoch+1}/{num_epochs}, loss={loss.item():.4f}')  # Display current epoch and loss
print(f'final loss, loss={loss.item():.4f}')  # Print final training loss
        
        

epoch 100/1000, loss=0.0324
epoch 200/1000, loss=0.0028
epoch 300/1000, loss=0.0006
epoch 400/1000, loss=0.0001
epoch 500/1000, loss=0.0001
epoch 600/1000, loss=0.0001
epoch 700/1000, loss=0.0001
epoch 800/1000, loss=0.0000
epoch 900/1000, loss=0.0000
epoch 1000/1000, loss=0.0000
final loss, loss=0.0000


In [None]:
# Save the trained model and related data for later use
data = {
    "model_state": model.state_dict(),  # Save the trained model weights and biases
    "input_size": input_size,  # Save input layer size (vocabulary size)
    "output_size": output_size,  # Save output layer size (number of intent categories)
    "hidden_size": hidden_size,  # Save hidden layer size
    "all_word": all_words,  # Save the vocabulary (all unique words)
    "tags": tags  # Save the intent tags list
}

# Attempt to save the model data to a file
try:
    FILE = 'data.pth'  # Define the filename for saving
    torch.save(data, FILE)  # Save all the data to the file
    print('Model successfully saved')  # Confirm successful save
except:
    print('save unsuccessful')  # Handle any save errors

Model successfully saved
