# Single Task Classification Notebook

First we import the necessary packages and modules


In [1]:
import os
import sys
module_path = "../../../"
if module_path not in sys.path:
    sys.path.append(module_path)
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torchtext.data import Field, LabelField
from codebase.data.imdbdataset import IMDBDataset
from codebase.data.customdataloader import CustomDataLoader
from torch.optim.lr_scheduler import StepLR
from codebase.experiments.single_task_classification.train_methods import *
from codebase.experiments.single_task_classification.config import *
# This models are some example model used for initial experiments
from codebase.models.simplelstm import SimpleLSTM
from codebase.models.simplemoe import SimpleMoE
from codebase.models.mlp import MLP
# The below lines are only needed when running the notebook on google colab
# from google.colab import drive
# drive.mount('/content/gdrive')


Set the constants for the experiments

In [2]:
torch.cuda.empty_cache()
torch.manual_seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
batch_size = BATCH_SIZE
include_lens = INCLUDE_LENGTHS
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Load in the data and construct iterators over batches

In [3]:
TEXT = Field(lower=SET_LOWERCASE, include_lengths=include_lens, batch_first=True)
# TEXT = Field(lower=True, tokenize="spacy", tokenizer_language="en", include_lengths=True, batch_first=True)
LABEL = LabelField(dtype=torch.long)

dataset = IMDBDataset(TEXT, LABEL, path="../.data/imdb/aclImdb").load()
# Load the IMDB dataset and split it into train and test portions
dloader = CustomDataLoader(dataset, TEXT, LABEL)
data_iterators = dloader.construct_iterators(vectors="glove.6B.300d", vector_cache="../.vector_cache",
                                             batch_size=BATCH_SIZE, device=device)

Set the specific models to be used and optimizer / scheduler for training

In [4]:
g = SimpleLSTM(vocab=TEXT.vocab, embedding_dim=300, hidden_dim=8, output_dim=3, device=device)
expert_networks = [SimpleLSTM(vocab=TEXT.vocab, embedding_dim=300, hidden_dim=128, output_dim=2, device=device)
                   for _ in range(3)]

moe_model = SimpleMoE(None, gating_network=g, expert_networks=
                      expert_networks, output_dim=2, device=torch.device("cpu"))


criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(lstm_model.parameters(), lr=OPTIMIZER_LEARNING_RATE)
optimizer = optim.SGD(moe_model.parameters(), lr=OPTIMIZER_LEARNING_RATE)
scheduler = StepLR(optimizer, step_size=SCHEDULER_STEP_SIZE, gamma=SCHEDULER_GAMMA)

In [None]:
# train(moe_model, criterion, optimizer, scheduler, data_iterators[0], device=device, include_lengths=include_lens,
#     save_path='saved_models/MoE', save_name="IMDB_dataset", use_tensorboard=True)