# Intro

This file is to just test the network_arch v2 file to see if it doesn't work in serial or not. 

We do some imports first, and 

In [1]:
import statistics as stats
from timeit import default_timer as timer

import matplotlib.pyplot as plt
import numpy as np
import argparse
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


import sys

from transformers import BertConfig



# Grab data 

In [2]:
from get_dataset import obtain_dataset
from torch.utils.data import DataLoader

ds, vocab_size = obtain_dataset(percent_data = 1000, seq_len=64)
train_size, test_size = int(len(ds) * 0.8), len(ds) - int(len(ds) * 0.8)  # 80/20 split by default
train_ds, test_ds = torch.utils.data.random_split(ds, [train_size, test_size])
print(f'{vocab_size=}')
train_loader = DataLoader(
train_ds, batch_size=32, shuffle=False, pin_memory=True, drop_last=True
)
test_loader = DataLoader(
test_ds, batch_size=32, shuffle=False, pin_memory=True, drop_last=True
)


Map:   0%|          | 0/1000 [00:00<?, ? examples/s]



Map (num_proc=8):   0%|          | 0/10934 [00:00<?, ? examples/s]

vocab_size=30522


# Import layers and test

Use importlib to make sure change reload is good

In [3]:
import importlib
import network_architecture_v2

# Reload the module to reflect any changes made
importlib.reload(network_architecture_v2)

# Now you can import the classes
from network_architecture_v2 import OpenLayer

In [4]:
config = BertConfig() # Creat simple config

In [5]:
print(config.type_vocab_size)

2


In [6]:
open = OpenLayer(config)

In [7]:
batch_iterator = iter(train_loader)  # Create an iterator from the DataLoader
single_batch = next(batch_iterator)   # Get a single batch


In [8]:
single_batch['bert_input'][0]

tensor([  101, 16087,  2620,  1516,  4216, 21893,  5221, 17184,  3119,  2090,
        17491, 11365, 23212,  1998,  3072,  1997, 17768,   103,  1999, 16087,
         2620,  1012,   102,   103,  2721, 18069,  5462,  2000, 17768,   103,
          103,  2935,  3419,  2080, 21122,  2594,  1010,  2008,  2027,  2097,
         2007,   103,  5194,   103, 10347, 16913,  4173,  1997,  5474,  1012,
          102,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0])

In [9]:
single_batch['segment_label'][0]

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [10]:
single_batch['attention_mask'][0]

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [11]:
single_batch['bert_label'][1]

tensor([    0, 18176,  2003,     0,  2028,     0,     0,     0,  2000,     0,
            0,  6803, 29461,     0,     0,     0,     0,     0,     0,     0,
         2088,     0,     0,     0,     0,  2161,     0,     0,     0,     0,
            0,     0,  4719,     0,     0,     0,     0,  2027,     0,  1999,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0])

In [12]:
x = open(single_batch['bert_input'], single_batch['segment_label'])

# After some tweaking, the open layer is done. 

The close layer is also good; now it's a matter of getting the steplayer to work.

In [13]:
x.shape

torch.Size([32, 64, 768])

In [14]:
import importlib
import network_architecture_v2

# Reload the module to reflect any changes made
importlib.reload(network_architecture_v2)

# Now you can import the classes
from network_architecture_v2 import OpenLayer, StepLayer

In [15]:
step = StepLayer(config)

In [16]:
step(x)

NameError: name 'mask' is not defined

# Okay after much tweaking, the model runs and trains a bit! 

Now for the post-processing: can we take our trained SerialNets, and then connect it back to Huggingface inference or benchmarking. 

Developed in parallel with bert-benchmark notebook.

In [26]:
import torch
from torch import nn
from typing import List, Optional, Tuple, Union

from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset, load_metric
import numpy as np

import importlib
import network_architecture_v2

# Reload the module to reflect any changes made
importlib.reload(network_architecture_v2)

# Now you can import the classes
from network_architecture_v2 import OpenLayer, StepLayer, MyBertForSequenceClassification

In [2]:
dataset = load_dataset("glue", "sst2")

In [3]:
# I believe this is the tokenizer I used... 
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

def tokenize_function(examples):
    return tokenizer(examples['sentence'], padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

In [27]:
trained_model = torch.load(f'serial_net_hf_bert_12_epoch=1')

print(f'{sum(p.numel() for p in trained_model.parameters() if p.requires_grad):.4e}')

1.0323e+07


In [28]:
trained_model.close.pooler

BertPooler(
  (dense): Linear(in_features=128, out_features=128, bias=True)
  (activation): Tanh()
)

In [29]:
model = MyBertForSequenceClassification(trained_model)
model = model.to('cuda')
# model.train()
print(f'{sum(p.numel() for p in model.parameters() if p.requires_grad):.4e}')

6.3689e+06


## Try with the SSTS... 

Okay.... this doesn't work in Jupyter because of namespace issues. 

Need to write individual file and use scripts. Ugh.

## Need to sub-out certain layers in model!


In [33]:
# Set up the training arguments
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    learning_rate=1e-4,
    per_device_train_batch_size=256,
    per_device_eval_batch_size=256,
    num_train_epochs=10,
    weight_decay=0.01,
)

In [None]:
# Define the compute metrics function
def compute_metrics(p):
    preds = p.predictions.argmax(-1)
    return {"accuracy": (preds == p.label_ids).astype(float).mean().item()}

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    compute_metrics=compute_metrics
)

# # Load the accuracy metric
# metric = load_metric("accuracy")

# # Define the compute metrics function
# def compute_metrics(eval_pred):
#     logits, labels = eval_pred
#     predictions = np.argmax(logits, axis=-1)
#     return metric.compute(predictions=predictions, references=labels)

# # Create a Trainer instance
# trainer = Trainer(
#     model=model,
#     args=training_args,
#     train_dataset=tokenized_datasets["train"],
#     eval_dataset=tokenized_datasets["validation"],
#     compute_metrics=compute_metrics,
# )

# Fine-tune the model
trainer.train()

# Evaluate the model
results = trainer.evaluate()
print(results)

Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.694164,0.509174
2,0.679600,0.640456,0.611239
3,0.679600,0.587521,0.678899
