In [None]:
class BaseRNN(nn.Module):
    def __init__(self, cell_type='simple_rnn', embed_size=128, state_sizes=[128, 128], output_type="mean", data_manager=None):
        super().__init__()
        self.cell_type = cell_type
        self.state_sizes = state_sizes
        self.embed_size = embed_size
        self.output_type = output_type
        self.data_manager = data_manager
        self.vocab_size = self.data_manager.vocab_size
        
        # Create an empty ModuleList to store layers
        self.layers = nn.ModuleList()
        
        # Call build to construct the model
        self.build()

    # Return the corresponding memory cell
    @staticmethod
    def get_layer(cell_type='simple_rnn', input_size=128, state_size=128):
        if cell_type == 'gru':
            return nn.GRU(input_size=input_size, hidden_size=state_size, batch_first=True)
        elif cell_type == 'lstm':
            return nn.LSTM(input_size=input_size, hidden_size=state_size, batch_first=True)
        else:
            return nn.RNN(input_size=input_size, hidden_size=state_size, batch_first=True)

    def build(self):
        # Add embedding layer to the layers list
        self.layers.append(nn.Embedding(self.vocab_size, self.embed_size))
        
        # Create the RNN layers based on state_sizes and append them
        input_size = self.embed_size  # Initial input size is the embedding size
        for state_size in self.state_sizes:
            self.layers.append(self.get_layer(self.cell_type, input_size, state_size))
            input_size = state_size  # Update input size for the next layer
        
        # Fully connected layer for classification
        self.fc = nn.Linear(self.state_sizes[-1], self.data_manager.num_classes)

    def forward(self, x):
        # Pass the input through the embedding layer
        x = self.layers[0](x)  # Embedding
        
        # Pass through each RNN layer in the model
        for layer in self.layers[1:]:
            if self.cell_type == 'lstm':
                x, (h_n, c_n) = layer(x)
            else:
                x, h_n = layer(x)
        
        # Process the output based on the output_type
        if self.output_type == "last_state":
            out = h_n[-1]  # Use the last hidden state of the last RNN layer
        elif self.output_type == "mean":
            out = torch.mean(x, dim=1)  # Take the mean of the hidden states over time
        elif self.output_type == "max":
            out, _ = torch.max(x, dim=1)  # Take the max of the hidden states over time
        else:
            raise ValueError(f"Unknown output type: {self.output_type}")

        # Pass through the final fully connected layer for classification
        out = self.fc(out)
        return out


In [None]:
import gensim.downloader as api
from gensim.models import Word2Vec
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
import numpy as np

class RNN(BaseRNN):
    def __init__(self, cell_type='gru', embed_size=128, state_sizes=[128, 128], output_type='mean', data_manager=None,
                 run_mode='scratch', embed_model='glove-wiki-gigaword-100'):
        # Set the run_mode and embed_model attributes first before calling the super constructor
        self.run_mode = run_mode
        self.embed_model = embed_model

        # Create the directory for saving the embeddings if it doesn't exist
        if not os.path.exists("embeddings"):
            os.makedirs("embeddings")
        
        # Set the path to save the embedding matrix
        self.embed_path = "embeddings/E.npy"

        # Initialize the embedding size based on the Word2Vec model if not training from scratch
        if self.run_mode != 'scratch':
            embed_size = int(self.embed_model.split("-")[-1])  # Update embed_size to match pretrained model

        # Word to index mapping from the data manager
        self.word2idx = data_manager.word2idx
        self.vocab_size = data_manager.vocab_size
        self.embed_matrix = np.zeros((self.vocab_size, embed_size))

        # Proceed with initializing the BaseRNN
        super().__init__(cell_type, embed_size, state_sizes, output_type, data_manager)

        # If using pretrained embeddings, build the embedding matrix
        if self.run_mode != 'scratch':
            self.build_embedding_matrix()

    def build_embedding_matrix(self):
        """Build the embedding matrix from pretrained Word2Vec model."""
        print(f"Loading {self.embed_model} model...")
        word2vect = api.load(self.embed_model)

        # Populate the embedding matrix
        for word, idx in self.word2idx.items():
            if word in word2vect:
                self.embed_matrix[idx] = word2vect[word]
            else:
                # Randomly initialize words not found in pretrained model
                self.embed_matrix[idx] = np.random.normal(scale=0.6, size=(self.embed_size,))
        
        # Save the embedding matrix to a file
        np.save(self.embed_path, self.embed_matrix)

    def build(self):
        """Builds the model based on the run mode for embedding initialization."""
        # If 'scratch', we train embeddings from scratch using nn.Embedding
        if self.run_mode == 'scratch':
            self.embedding = nn.Embedding(self.vocab_size, self.embed_size)
        
        # If 'init-only', load pretrained embeddings but freeze the embedding layer (no fine-tuning)
        elif self.run_mode == 'init-only':
            self.embedding = nn.Embedding.from_pretrained(torch.tensor(self.embed_matrix, dtype=torch.float32), freeze=True)
        
        # If 'init-fine-tune', load pretrained embeddings and allow fine-tuning
        elif self.run_mode == 'init-fine-tune':
            self.embedding = nn.Embedding.from_pretrained(torch.tensor(self.embed_matrix, dtype=torch.float32), freeze=False)

        # Proceed with the rest of the RNN layers as in BaseRNN
        self.layers = nn.ModuleList()

        # Append embedding layer
        self.layers.append(self.embedding)

        # Append RNN layers
        input_size = self.embed_size
        for state_size in self.state_sizes:
            self.layers.append(self.get_layer(self.cell_type, input_size, state_size))
            input_size = state_size

        # Fully connected output layer
        self.fc = nn.Linear(self.state_sizes[-1], self.data_manager.num_classes)

In [None]:
import torch
import torch.nn as nn
from torch.optim.lr_scheduler import ReduceLROnPlateau

print("Running experiment with run_mode = 'init-fine-tune'")

# Define the RNN model with modified dropout and other regularization settings
rnn_init_fine_tune = RNN(
    cell_type='lstm', 
    embed_size=128, 
    state_sizes=[64, 128], 
    output_type='mean', 
    data_manager=dm, 
    run_mode='init-fine-tune'
).to(device)

# Define optimizer with an updated learning rate
optimizer = torch.optim.Adam(rnn_init_fine_tune.parameters(), lr=0.003)

# Set up a scheduler for dynamic learning rate adjustment
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True)

# Instantiate trainer with early stopping after 3 epochs of no improvement in validation loss
trainer_init_fine_tune = BaseTrainer(
    model=rnn_init_fine_tune,
    criterion=nn.CrossEntropyLoss(),
    optimizer=optimizer,
    train_loader=dm.train_loader,
    val_loader=dm.valid_loader,
    scheduler=scheduler
)


trainer_init_fine_tune.fit(num_epochs=15)
