In [1]:
import torch
import torch.nn as nn
from Preprocessing import tags, all_words, X, y
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset
from copy import copy

In [2]:
X.shape, y.shape

((177, 193), (177,))

In [3]:
class ChatDataset(Dataset):

    def __init__(self):
        self.n_samples = len(X)
        self.x_data = X
        self.y_data = y

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.n_samples

dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset,
                          batch_size=16,
                          shuffle=True,
                          drop_last=True)

In [4]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, dropout_rate=0.3):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size)
        self.l2 = nn.Linear(hidden_size, hidden_size//2)  
        self.l3 = nn.Linear(hidden_size//2, num_classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)
        self.batch_norm1 = nn.BatchNorm1d(hidden_size)
        self.batch_norm2 = nn.BatchNorm1d(hidden_size//2)
    
    def forward(self, x):
        out = self.l1(x)
        out = self.batch_norm1(out)
        out = self.relu(out)
        out = self.dropout(out)
        
        out = self.l2(out)
        out = self.batch_norm2(out)
        out = self.relu(out)
        out = self.dropout(out)
        
        out = self.l3(out)
        return out

In [5]:
num_epochs = 1000
batch_size = 8
learning_rate = 0.001
input_size = len(X[0])
hidden_size = 64
output_size = len(tags)
patience = 100
min_delta = 0.001
model = NeuralNet(input_size, hidden_size, output_size)

criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)


scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, patience=50, verbose=True
)






In [6]:
best_loss = float('inf')
early_stop_counter = 0

# Train
for epoch in tqdm(range(num_epochs)):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for (words, labels) in train_loader:
        words = words.float()
        labels = labels.long()

        outputs = model(words)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        # Calculate accuracy properly
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_loader)
    train_accuracy = 100 * correct / total
    
    # Learning rate scheduler step - fixed to use epoch_loss after it's calculated
    scheduler.step(epoch_loss)
    
    # Early Stopping check based on training loss
    if epoch_loss < best_loss - min_delta:  # Check for significant improvement
        best_loss = epoch_loss
        early_stop_counter = 0
        # Save the best model
        best_model = copy(model.state_dict())
    else:
        early_stop_counter += 1
    
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {train_accuracy:.2f}%')
    
    if early_stop_counter >= patience:
        print(f"Early stopping triggered at epoch {epoch+1}")
        break

# Load the best model before testing
if 'best_model' in locals():
    model.load_state_dict(best_model)

print(f'Final training loss: {epoch_loss:.4f}')




data = {
    "model_state": model.state_dict(),
    "input_size": input_size,
    "hidden_size": hidden_size,
    "output_size": output_size,
    "all_words": all_words,
    "tags": tags
}

FILE = "data.pth"
torch.save(data, FILE)

print(f'Training complete. Model saved to {FILE}')

 10%|█         | 104/1000 [00:04<00:35, 25.37it/s]

Epoch [100/1000], Loss: 0.1902, Accuracy: 95.45%


 20%|██        | 204/1000 [00:08<00:32, 24.52it/s]

Epoch [200/1000], Loss: 0.1114, Accuracy: 94.89%


 30%|███       | 300/1000 [00:12<00:28, 24.23it/s]

Epoch [300/1000], Loss: 0.1073, Accuracy: 96.59%
Early stopping triggered at epoch 301
Final training loss: 0.1184
Training complete. Model saved to data.pth



