# Training notebook 

* Design a pipeline to load individual videos and stream them to model in 3 second chunks 
    * load a file. split the audio file into 960 ms clips 
    * once we are out of clips we load another file
* Import VGGish for embedding 1 second chunks
* Stream data to the model and train LSTM + feed-forward 
* Visualize loss before hyperparameter tuning 

In [2]:
import torch
from torch import nn
from torch import optim
from torch.hub import load
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
training_safe = pd.read_csv('train_safe.csv')
training_dangerous = pd.read_csv('train_dangerous.csv')

validation_safe = pd.read_csv('validation_safe.csv')
validation_dangerous = pd.read_csv('validation_dangerous.csv')

In [None]:
# load vggish model as feature extractor
features = load('harritaylor/torchvggish', 'vggish')
features.eval()

Using cache found in C:\Users\2005e/.cache\torch\hub\harritaylor_torchvggish_master
Downloading: "https://github.com/harritaylor/torchvggish/releases/download/v0.1/vggish-10086976.pth" to C:\Users\2005e/.cache\torch\hub\checkpoints\vggish-10086976.pth
100%|██████████| 275M/275M [00:26<00:00, 10.9MB/s] 
Downloading: "https://github.com/harritaylor/torchvggish/releases/download/v0.1/vggish_pca_params-970ea276.pth" to C:\Users\2005e/.cache\torch\hub\checkpoints\vggish_pca_params-970ea276.pth
100%|██████████| 177k/177k [00:00<00:00, 2.49MB/s]


VGGish(
  (features): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU(inplace=True)
    (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU(inplace=True)
    (15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False

In [None]:
class SentimentLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout=0.5):
        super(SentimentLSTM, self).__init__()
        
        # Define the LSTM layer
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        
        # Fully connected layers
        self.fc1 = nn.Linear(hidden_size, 32)
        self.fc2 = nn.Linear(32, 8)
        self.fc3 = nn.Linear(8, output_size)
        
        # Activation function
        self.activation = nn.LeakyReLU()
        self.final_activation = nn.Sigmoid()

    def forward(self, x):
        # LSTM forward pass
        lstm_out, _ = self.lstm(x)
        
        # Take the output from the last time step
        last_output = lstm_out[:, -1, :]
        
        # Pass through fully connected layers with activation
        x = self.fc1(last_output)
        x = self.activation(x)
        x = self.fc2(x)
        x = self.activation(x)
        x = self.fc3(x)
        
        # Apply final activation
        return self.final_activation(x)

In [None]:
input_size = 128    # Output size of VGGish features
hidden_size = 128   # hidden state size of the LSTM
num_layers = 2      # Number of stacked LSTM layers
output_size = 1     # Binary classification (positive/negative sentiment)
dropout = 0.3       # Dropout for regularization

model = SentimentLSTM(input_size, hidden_size, num_layers, output_size, dropout=dropout)

In [1]:
def training_loop(epochs):
    for n in epochs:
        # Set the model to training mode
        model.train()
        
        # Initialize the optimizer
        optimizer = optim.Adam(model.parameters(), lr=0.001)
        
        # Initialize the loss function
        criterion = nn.BCELoss()
        
        # Initialize the loss accumulator
        train_loss = 0.0
        
        # Iterate over the training data
        for i in range(len(training_safe)):
            # Extract the features and labels
            features_safe = training_safe.iloc[i].values
            features_dangerous = training_dangerous.iloc[i].values
            features = np.concatenate([features_safe, features_dangerous], axis=0)
            features = torch.tensor(features, dtype=torch.float32)
            
            # Extract the labels
            label = torch.tensor([0], dtype=torch.float32)
            
            # Zero the gradients
            optimizer.zero_grad()
            
            # Forward pass
            model.forward(features)

SyntaxError: incomplete input (3224766424.py, line 2)