In [37]:
# Init/Load model
from transformers import BartForConditionalGeneration, BartTokenizer
import torch
import os

device = "cuda"

# Define a directory to save the models
SAVE_DIR = '../saved_models'
if not os.path.exists(SAVE_DIR):
    os.makedirs(SAVE_DIR)

start_epoch = 110

class SimpleBART(torch.nn.Module):
    def __init__(self):
        super(SimpleBART, self).__init__()
        if start_epoch > 0:
            self.bart = BartForConditionalGeneration.from_pretrained(os.path.join(SAVE_DIR, f'epoch_{start_epoch}'))
            print(f'Loaded epoch_{start_epoch}')
        else:
            self.bart = BartForConditionalGeneration.from_pretrained('facebook/bart-base')
            print('Loaded facebook/bart-base')
        self.tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')

    def forward(self, input_ids, attention_mask):
        return self.bart(input_ids=input_ids, attention_mask=attention_mask)


model = SimpleBART().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
criterion = torch.nn.CrossEntropyLoss()

Loaded epoch_110


In [2]:
# Load Raw Datasets
from torch.utils.data import Dataset, DataLoader
import csv

ACCUMULATION_STEPS = 14
BATCH_SIZE = 14 # best performing batch size so far (in execution performance)
DATA_SIZE = 0

class CustomDataset(Dataset):
    def __init__(self, data, tokenizer, max_length=200):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.pad_token_id = tokenizer.pad_token_id
        self.start_token_id = tokenizer.cls_token_id
        self.end_token_id = tokenizer.eos_token_id

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        text, tokens = self.data[idx]
        
        inputs = self.tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=self.max_length)
        
        # Add start and end tokens and then pad
        tokens = [self.start_token_id] + tokens + [self.end_token_id]
        tokens_padded = [self.pad_token_id] * self.max_length
        tokens_padded[:len(tokens)] = tokens
        tokens_padded[len(tokens):] = [self.pad_token_id] * (self.max_length - len(tokens))
        
        return inputs["input_ids"].squeeze(0), inputs["attention_mask"].squeeze(0), torch.tensor(tokens_padded, dtype=torch.long)


def load_data_from_csv(file_path):
    with open(file_path, 'r') as file:
        reader = csv.reader(file)
        data = [(row[0], [int(tok) for tok in row[1].split(",")]) for row in reader]

    return data

def apply_concept(params, validation=False):
    global validationLoader
    global dataloader
    global DATA_SIZE

    merged_data = load_data_from_csv(f"concept/egg.csv")
    
    print("Concepts loaded;")
    for file_name, percentage in params.items():
        data = load_data_from_csv(f"concept/{file_name}.csv")
        cutoff = int(len(data) * percentage)
        
        if validation:
            loaded_data = data[-cutoff:]
        else:
            loaded_data = data[:cutoff]

        print(f"    - {file_name}: {len(loaded_data)}")
        merged_data.extend(loaded_data)

    DATA_SIZE = len(merged_data)
    dataset = CustomDataset(merged_data, model.tokenizer)
    if validation:
        validationLoader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
    else:
        dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
    print(f'  Total: {DATA_SIZE}')

In [3]:
# Setup trainer
import numpy as np

def trainFor(num_epochs, log_freq, target_loss=0, target_acc=1.0):
    global start_epoch

    EOS_TOKEN_ID = model.tokenizer.eos_token_id
    acc_batch = int(ACCUMULATION_STEPS / BATCH_SIZE)

    for epoch in range(start_epoch+1, start_epoch+num_epochs+1):
        start_epoch = epoch
        model.train()

        # Resetting the accumulated gradients
        optimizer.zero_grad()

        target_met = False

        # Initialize counters for accuracy calculation
        total_correct_sequences = 0
        total_sequences = 0
        cumulative_loss = 0.0

        # Initialize list to store batch losses
        batch_losses = []

        for batch_idx, (input_ids, attention_mask, targets) in enumerate(dataloader):

            input_ids, attention_mask, targets = input_ids.to(device), attention_mask.to(device), targets.to(device)
            outputs = model(input_ids, attention_mask)
            logits = outputs.logits

            # Identify where the EOS token is in the target sequence
            eos_positions = (targets == EOS_TOKEN_ID).cumsum(dim=1).type(torch.bool)
            mask = ~eos_positions | (targets == EOS_TOKEN_ID)

            # Apply mask to filter out tokens after the EOS token for loss computation
            active_loss = mask.view(-1).bool()
            active_logits = logits.view(-1, logits.size(-1))[active_loss]
            active_labels = targets.view(-1)[active_loss]
            loss = criterion(active_logits, active_labels)

            _, predicted = logits.max(2)
            correct_sequences = ((predicted == targets) | ~mask).all(dim=1).float().sum().item()
            total_sequences += targets.size(0)
            total_correct_sequences += correct_sequences

            # Accumulate the gradients
            loss.backward()
            loss_val = loss.item()

            cumulative_loss += loss_val
            batch_losses.append(loss_val)

            isLast = batch_idx == len(dataloader) - 1

            if isLast or batch_idx % log_freq == 0:
                print(f"Epoch: {epoch}, Batch: {batch_idx}, Loss: {loss_val}")

            # Only perform an optimization step every ACCUMULATION_STEPS
            if isLast or batch_idx % acc_batch == 0:
                if loss_val <= target_loss:
                    target_met = True

                optimizer.step()
                optimizer.zero_grad()

        # Compute and print the accuracy for the entire epoch
        epoch_accuracy = total_correct_sequences / total_sequences
        cumulative_loss = cumulative_loss / len(batch_losses)
        p25_loss = np.percentile(batch_losses, 25)
        p50_loss = np.percentile(batch_losses, 50)
        p75_loss = np.percentile(batch_losses, 75)
        print(f"Epoch: {epoch}, Accuracy: {epoch_accuracy*100:.2f}%\n  Loss: 25%: {p25_loss:.6f} 50%: {p50_loss:.6f} 75%: {p75_loss:.6f}\n   Avg: {cumulative_loss:.6f}")

        validate()

        if epoch % 10 == 0: # Save the model
            save_model()

        if epoch_accuracy >= target_acc:
            break

        if target_met:
            break

    # Make sure last epoch is always saved
    save_model()

def save_model():
    global start_epoch
    model_save_path = os.path.join(SAVE_DIR, f'epoch_{start_epoch}')
    model.bart.save_pretrained(model_save_path)

In [4]:
# Setup Validator
def validate():
    EOS_TOKEN_ID = model.tokenizer.eos_token_id

    model.eval()

    # Initialize counters for accuracy calculation
    total_correct_sequences = 0
    total_sequences = 0

    # Initialize counters for average sequence accuracy within the mask
    total_accuracy = 0

    with torch.no_grad():
        for batch_idx, (input_ids, attention_mask, targets) in enumerate(validationLoader):
            input_ids, attention_mask, targets = input_ids.to(device), attention_mask.to(device), targets.to(device)
            outputs = model(input_ids, attention_mask)
            logits = outputs.logits

            # Identify where the EOS token is in the target sequence
            eos_positions = (targets == EOS_TOKEN_ID).cumsum(dim=1).type(torch.bool)
            mask = ~eos_positions | (targets == EOS_TOKEN_ID)

            _, predicted = logits.max(2)
            correct_sequences = ((predicted == targets) | ~mask).all(dim=1).float().sum().item()
            total_sequences += targets.size(0)
            total_correct_sequences += correct_sequences

            # Compute the accuracy for each sequence
            correct_tokens_per_sequence = ((predicted == targets) & mask).float().sum(dim=1)
            total_tokens_per_sequence = mask.float().sum(dim=1)
            total_accuracy += (correct_tokens_per_sequence / total_tokens_per_sequence).sum().item()

    # Compute and print the accuracy for the entire validation dataset
    validation_accuracy = total_correct_sequences / total_sequences
    print(f"Total Seq Accuracy: {validation_accuracy*100:.3f}%")
    avg_accuracy = total_accuracy / total_sequences
    print(f"Average Sequence: {avg_accuracy*100:.3f}%")

In [5]:
ACCUMULATION_STEPS = BATCH_SIZE # pure memorisation so accumulation won't help
apply_concept({"vocabulary": 1.0, "noise": 0.01})
trainFor(50, log_freq=DATA_SIZE, target_acc=0.40)
# (31mins)

Concepts loaded;
    - vocabulary: 6808
    - noise: 390
  Total: 7199
Epoch: 1, Batch: 0, Loss: 7.29101037979126
Epoch: 1, Batch: 514, Loss: 2.501331090927124
Epoch: 1, Accuracy: 0.06%
Epoch: 2, Batch: 0, Loss: 2.7425403594970703
Epoch: 2, Batch: 514, Loss: 2.56217098236084
Epoch: 2, Accuracy: 0.08%
Epoch: 3, Batch: 0, Loss: 2.961082696914673
Epoch: 3, Batch: 514, Loss: 2.5872976779937744
Epoch: 3, Accuracy: 0.08%
Epoch: 4, Batch: 0, Loss: 3.54451584815979
Epoch: 4, Batch: 514, Loss: 2.6127095222473145
Epoch: 4, Accuracy: 0.10%
Epoch: 5, Batch: 0, Loss: 3.0492312908172607
Epoch: 5, Batch: 514, Loss: 2.625404119491577
Epoch: 5, Accuracy: 0.10%
Epoch: 6, Batch: 0, Loss: 3.1311771869659424
Epoch: 6, Batch: 514, Loss: 2.4623067378997803
Epoch: 6, Accuracy: 0.19%
Epoch: 7, Batch: 0, Loss: 2.6145153045654297
Epoch: 7, Batch: 514, Loss: 2.3633921146392822
Epoch: 7, Accuracy: 0.22%
Epoch: 8, Batch: 0, Loss: 3.5724732875823975
Epoch: 8, Batch: 514, Loss: 2.420199394226074
Epoch: 8, Accuracy: 0

In [6]:
ACCUMULATION_STEPS = BATCH_SIZE # pure memorisation so accumulation won't help
apply_concept({"vocabulary": 1.0, "noise": 0.10})
trainFor(50, log_freq=DATA_SIZE, target_acc=0.50)
# (24mins)

Concepts loaded;
    - vocabulary: 6808
    - noise: 3902
  Total: 10711
Epoch: 20, Batch: 0, Loss: 5.020002841949463
Epoch: 20, Batch: 765, Loss: 0.16530807316303253
Epoch: 20, Accuracy: 29.16%
Epoch: 21, Batch: 0, Loss: 2.432830572128296
Epoch: 21, Batch: 765, Loss: 4.899008274078369
Epoch: 21, Accuracy: 31.86%
Epoch: 22, Batch: 0, Loss: 2.754391670227051
Epoch: 22, Batch: 765, Loss: 0.5231565237045288
Epoch: 22, Accuracy: 35.51%
Epoch: 23, Batch: 0, Loss: 1.9788975715637207
Epoch: 23, Batch: 765, Loss: 0.11301183700561523
Epoch: 23, Accuracy: 38.34%
Epoch: 24, Batch: 0, Loss: 1.2154724597930908
Epoch: 24, Batch: 765, Loss: 0.007895281538367271
Epoch: 24, Accuracy: 40.39%
Epoch: 25, Batch: 0, Loss: 1.191716194152832
Epoch: 25, Batch: 765, Loss: 0.015043791383504868
Epoch: 25, Accuracy: 42.92%
Epoch: 26, Batch: 0, Loss: 1.854417324066162
Epoch: 26, Batch: 765, Loss: 1.0427758693695068
Epoch: 26, Accuracy: 44.90%
Epoch: 27, Batch: 0, Loss: 1.3035759925842285
Epoch: 27, Batch: 765, Loss

In [7]:
ACCUMULATION_STEPS = 28 # *14 close to 32
apply_concept({"vocabulary": 1.0, "noise": 0.20})
trainFor(50, log_freq=DATA_SIZE, target_acc=0.65)
# (81mins)

Concepts loaded;
    - vocabulary: 6808
    - noise: 7805
  Total: 14614
Epoch: 30, Batch: 0, Loss: 1.6802232265472412
Epoch: 30, Batch: 1043, Loss: 1.063194990158081
Epoch: 30, Accuracy: 41.64%
Epoch: 31, Batch: 0, Loss: 0.6703682541847229
Epoch: 31, Batch: 1043, Loss: 0.3829524517059326
Epoch: 31, Accuracy: 44.61%
Epoch: 32, Batch: 0, Loss: 0.38985922932624817
Epoch: 32, Batch: 1043, Loss: 0.33271169662475586
Epoch: 32, Accuracy: 46.08%
Epoch: 33, Batch: 0, Loss: 0.4413485825061798
Epoch: 33, Batch: 1043, Loss: 0.2826402485370636
Epoch: 33, Accuracy: 46.93%
Epoch: 34, Batch: 0, Loss: 0.39702412486076355
Epoch: 34, Batch: 1043, Loss: 0.5775324106216431
Epoch: 34, Accuracy: 48.26%
Epoch: 35, Batch: 0, Loss: 0.5960874557495117
Epoch: 35, Batch: 1043, Loss: 0.6266477108001709
Epoch: 35, Accuracy: 50.28%
Epoch: 36, Batch: 0, Loss: 0.5490836501121521
Epoch: 36, Batch: 1043, Loss: 0.8670615553855896
Epoch: 36, Accuracy: 50.96%
Epoch: 37, Batch: 0, Loss: 0.3249604403972626
Epoch: 37, Batch: 

Aiming for:
$$
\begin{align*}
  \frac{unique}{tokens} &= \frac{4174}{6808} = 61.31\%  & \text{sign pairs to unique text} \\
  \frac{text}{tokens}   &= \frac{5342}{6808}  = 78.47\% & \text{sign pairs to text} \\
  & & \text{unique meaning the text is only used for one tokenID}
\end{align*}
$$

i.e. there are six different signs which can be used for "present"  
which means we're actually aiming for $96\%$ effective accuracy $\frac{75\%}{78\%}$

But we don't want to over-fit either  
Hence why we slowly introduce new concepts while still memorising vocabulary

In [8]:
ACCUMULATION_STEPS = 70 # *14 close to 64
apply_concept({"vocabulary": 1.0, "noise": 0.9})
trainFor(50, log_freq=DATA_SIZE, target_acc=0.75)
# (416mins)

Concepts loaded;
    - vocabulary: 6808
    - noise: 35124
  Total: 41933
Epoch: 57, Batch: 0, Loss: 0.31596410274505615
Epoch: 57, Batch: 2995, Loss: 0.9586284160614014
Epoch: 57, Accuracy: 41.30%
Epoch: 58, Batch: 0, Loss: 0.23302768170833588
Epoch: 58, Batch: 2995, Loss: 2.3223114013671875
Epoch: 58, Accuracy: 42.93%
Epoch: 59, Batch: 0, Loss: 0.3411206305027008
Epoch: 59, Batch: 2995, Loss: 0.14493200182914734
Epoch: 59, Accuracy: 44.05%
Epoch: 60, Batch: 0, Loss: 0.1739943027496338
Epoch: 60, Batch: 2995, Loss: 0.24309362471103668
Epoch: 60, Accuracy: 44.64%
Epoch: 61, Batch: 0, Loss: 0.4015417993068695
Epoch: 61, Batch: 2995, Loss: 0.5365091562271118
Epoch: 61, Accuracy: 45.30%
Epoch: 62, Batch: 0, Loss: 0.30252334475517273
Epoch: 62, Batch: 2995, Loss: 0.00811807345598936
Epoch: 62, Accuracy: 45.38%
Epoch: 63, Batch: 0, Loss: 0.2994730770587921
Epoch: 63, Batch: 2995, Loss: 0.10692374408245087
Epoch: 63, Accuracy: 46.49%
Epoch: 64, Batch: 0, Loss: 0.22777970135211945
Epoch: 64, 

In [6]:
apply_concept({"vocabulary": 1.0, "noise": 0.1}, validation=True)
validate()
apply_concept({"vocabulary": 1.0}, validation=True)
validate()
apply_concept({"noise": 0.1}, validation=True)
validate()

Concepts loaded;
    - vocabulary: 6808
    - noise: 3902
  Total: 10711
Validation Accuracy: 65.213%
Concepts loaded;
    - vocabulary: 6808
  Total: 6809
Validation Accuracy: 80.320%
Concepts loaded;
    - noise: 3902
  Total: 3903
Validation Accuracy: 38.868%


In [10]:
ACCUMULATION_STEPS = 28 # *14 close to 32
apply_concept({"vocabulary": 1.0, "noise": 0.9})
trainFor(50, log_freq=DATA_SIZE, target_acc=0.8)
# (94mins)

Concepts loaded;
    - vocabulary: 6808
    - noise: 35124
  Total: 41933
Epoch: 107, Batch: 0, Loss: 0.14195597171783447
Epoch: 107, Batch: 2995, Loss: 0.0344601608812809
Epoch: 107, Accuracy: 72.98%
Epoch: 108, Batch: 0, Loss: 0.10983139276504517
Epoch: 108, Batch: 2995, Loss: 0.04287264868617058
Epoch: 108, Accuracy: 72.86%
Epoch: 109, Batch: 0, Loss: 0.08244488388299942
Epoch: 109, Batch: 2995, Loss: 0.07554608583450317
Epoch: 109, Accuracy: 73.84%
Epoch: 110, Batch: 0, Loss: 0.06210318207740784
Epoch: 110, Batch: 2995, Loss: 0.13856537640094757
Epoch: 110, Accuracy: 74.86%
Epoch: 111, Batch: 0, Loss: 0.13895009458065033
Epoch: 111, Batch: 2995, Loss: 0.005273606162518263
Epoch: 111, Accuracy: 75.72%
Epoch: 112, Batch: 0, Loss: 0.09678803384304047
Epoch: 112, Batch: 2995, Loss: 0.08273360878229141
Epoch: 112, Accuracy: 76.09%
Epoch: 113, Batch: 0, Loss: 0.07797343283891678
Epoch: 113, Batch: 2995, Loss: 0.04882273077964783
Epoch: 113, Accuracy: 77.07%
Epoch: 114, Batch: 0, Loss: 0.

In [8]:
apply_concept({"vocabulary": 1.0, "noise": 0.1}, validation=True)
validate()
apply_concept({"vocabulary": 1.0}, validation=True)
validate()
apply_concept({"noise": 0.1}, validation=True)
validate()

Concepts loaded;
    - vocabulary: 6808
    - noise: 3902
  Total: 10711
Validation Accuracy: 65.139%
Concepts loaded;
    - vocabulary: 6808
  Total: 6809
Validation Accuracy: 80.291%
Concepts loaded;
    - noise: 3902
  Total: 3903
Validation Accuracy: 38.714%


In [12]:
ACCUMULATION_STEPS = 28 # *14 close to 32
apply_concept({"vocabulary": 1.0, "noise": 0.9})
trainFor(50, log_freq=DATA_SIZE, target_acc=0.9)
# (172mins)

Concepts loaded;
    - vocabulary: 6808
    - noise: 35124
  Total: 41933
Epoch: 118, Batch: 0, Loss: 0.019620144739747047
Epoch: 118, Batch: 2995, Loss: 0.0025075555313378572
Epoch: 118, Accuracy: 81.42%
Epoch: 119, Batch: 0, Loss: 0.08252128958702087
Epoch: 119, Batch: 2995, Loss: 0.09791643917560577
Epoch: 119, Accuracy: 82.11%
Epoch: 120, Batch: 0, Loss: 0.09593144059181213
Epoch: 120, Batch: 2995, Loss: 0.24362444877624512
Epoch: 120, Accuracy: 83.02%
Epoch: 121, Batch: 0, Loss: 0.2580302059650421
Epoch: 121, Batch: 2995, Loss: 0.1866842806339264
Epoch: 121, Accuracy: 83.22%
Epoch: 122, Batch: 0, Loss: 0.049325548112392426
Epoch: 122, Batch: 2995, Loss: 0.00907761137932539
Epoch: 122, Accuracy: 84.24%
Epoch: 123, Batch: 0, Loss: 0.04175214096903801
Epoch: 123, Batch: 2995, Loss: 0.08660128712654114
Epoch: 123, Accuracy: 84.64%
Epoch: 124, Batch: 0, Loss: 0.068450927734375
Epoch: 124, Batch: 2995, Loss: 0.05998029559850693
Epoch: 124, Accuracy: 84.78%
Epoch: 125, Batch: 0, Loss: 0.

In [10]:
apply_concept({"vocabulary": 1.0, "noise": 0.1}, validation=True)
validate()
apply_concept({"vocabulary": 1.0}, validation=True)
validate()
apply_concept({"noise": 0.1}, validation=True)
validate()

Concepts loaded;
    - vocabulary: 6808
    - noise: 3902
  Total: 10711
Validation Accuracy: 65.055%
Concepts loaded;
    - vocabulary: 6808
  Total: 6809
Validation Accuracy: 80.276%
Concepts loaded;
    - noise: 3902
  Total: 3903
Validation Accuracy: 38.509%


There were no meaningful validation accuracy improvements since epoch $106$, so we rewound back to that point.  
We then regenerated the noise samples with $\times4$ the number of samples in the set from before.

This new generation of sample data may lead to a value in the new validation sample possibly have existed in the previous training set.  
However due to the large increase in size, and the random nature of the entropic sort the influences of this are kept to a minimum.

Just generating this entropic dataset took $25mins$

We also altered the code so `apply_concept` saves to a different namespace for validation and training meaning both datasets can be loaded at once  
This allows us to easily run validation after each training epoch for greater tracking of progress

In [5]:
ACCUMULATION_STEPS = 1022 # *14 close to 1024
apply_concept({"vocabulary": 1.0, "noise": 0.1}, validation=True)
apply_concept({"vocabulary": 1.0, "noise": 0.225})
trainFor(50, log_freq=DATA_SIZE, target_acc=0.6)
# (223mins)

Concepts loaded;
    - vocabulary: 6808
    - noise: 15614
  Total: 22423
Concepts loaded;
    - vocabulary: 6808
    - noise: 35132
  Total: 41941
Epoch: 107, Batch: 0, Loss: 0.17003555595874786
Epoch: 107, Batch: 2995, Loss: 0.3649917244911194
Epoch: 107, Accuracy: 44.53%
  Loss: 25%: 0.266961 50%: 0.322859 75%: 0.401827
   Avg: 0.376571
Validation Accuracy: 52.558%
Epoch: 108, Batch: 0, Loss: 0.2767481803894043
Epoch: 108, Batch: 2995, Loss: 0.2282908856868744
Epoch: 108, Accuracy: 45.71%
  Loss: 25%: 0.236131 50%: 0.285327 75%: 0.352968
   Avg: 0.327862
Validation Accuracy: 52.785%
Epoch: 109, Batch: 0, Loss: 0.2816326320171356
Epoch: 109, Batch: 2995, Loss: 0.44868266582489014
Epoch: 109, Accuracy: 45.96%
  Loss: 25%: 0.224561 50%: 0.274050 75%: 0.334460
   Avg: 0.311470
Validation Accuracy: 52.669%
Epoch: 110, Batch: 0, Loss: 0.3383205533027649
Epoch: 110, Batch: 2995, Loss: 0.23295405507087708
Epoch: 110, Accuracy: 46.30%
  Loss: 25%: 0.220841 50%: 0.266846 75%: 0.325280
   Avg:

In [5]:
ACCUMULATION_STEPS = 1022 # *14 close to 1024
apply_concept({"vocabulary": 1.0, "noise": 0.1}, validation=True)
apply_concept({"vocabulary": 1.0, "noise": 0.225})
trainFor(50, log_freq=DATA_SIZE, target_acc=0.6)
# (473mins)

Concepts loaded;
    - vocabulary: 6808
    - noise: 15614
  Total: 22423
Concepts loaded;
    - vocabulary: 6808
    - noise: 35132
  Total: 41941
Epoch: 131, Batch: 0, Loss: 0.17456796765327454
Epoch: 131, Batch: 2995, Loss: 0.2147127389907837
Epoch: 131, Accuracy: 49.08%
  Loss: 25%: 0.182142 50%: 0.219519 75%: 0.261854
   Avg: 0.238503
Validation Accuracy: 53.039%
Epoch: 132, Batch: 0, Loss: 0.15447185933589935
Epoch: 132, Batch: 2995, Loss: 0.23203356564044952
Epoch: 132, Accuracy: 49.53%
  Loss: 25%: 0.179635 50%: 0.218030 75%: 0.260219
   Avg: 0.233673
Validation Accuracy: 53.182%
Epoch: 133, Batch: 0, Loss: 0.22216513752937317
Epoch: 133, Batch: 2995, Loss: 0.225952610373497
Epoch: 133, Accuracy: 49.67%
  Loss: 25%: 0.179488 50%: 0.215952 75%: 0.258133
   Avg: 0.232541
Validation Accuracy: 52.972%
Epoch: 134, Batch: 0, Loss: 0.2670586109161377
Epoch: 134, Batch: 2995, Loss: 0.2001418173313141
Epoch: 134, Accuracy: 50.15%
  Loss: 25%: 0.175179 50%: 0.213433 75%: 0.257488
   Avg:

In [6]:
# Break down of validation per concept
apply_concept({"vocabulary": 1.0}, validation=True)
validate()
apply_concept({"noise": 0.1}, validation=True)
validate()

Concepts loaded;
    - vocabulary: 6808
  Total: 6809
Validation Accuracy: 80.276%
Concepts loaded;
    - noise: 15614
  Total: 15615
Validation Accuracy: 40.384%


In [7]:
ACCUMULATION_STEPS = 1022 # *14 close to 1024
apply_concept({"vocabulary": 1.0, "noise": 0.1}, validation=True)
apply_concept({"vocabulary": 1.0, "noise": 0.49})
trainFor(50, log_freq=DATA_SIZE, target_acc=0.7)
# (147mins)

Concepts loaded;
    - vocabulary: 6808
    - noise: 15614
  Total: 22423
Concepts loaded;
    - vocabulary: 6808
    - noise: 76510
  Total: 83319
Epoch: 181, Batch: 0, Loss: 0.1759694367647171
Epoch: 181, Batch: 5951, Loss: 0.22194238007068634
Epoch: 181, Accuracy: 49.46%
  Loss: 25%: 0.177602 50%: 0.217874 75%: 0.265804
   Avg: 0.242470
Validation Accuracy: 52.830%
Epoch: 182, Batch: 0, Loss: 0.25460365414619446
Epoch: 182, Batch: 5951, Loss: 0.21128690242767334
Epoch: 182, Accuracy: 49.48%
  Loss: 25%: 0.175104 50%: 0.212107 75%: 0.255886
   Avg: 0.235241
Validation Accuracy: 52.905%
Epoch: 183, Batch: 0, Loss: 0.12572143971920013
Epoch: 183, Batch: 5951, Loss: 0.33139023184776306
Epoch: 183, Accuracy: 49.27%
  Loss: 25%: 0.172943 50%: 0.209403 75%: 0.251747
   Avg: 0.229739
Validation Accuracy: 53.004%
Epoch: 184, Batch: 0, Loss: 0.2354583591222763
Epoch: 184, Batch: 5951, Loss: 0.17928846180438995
Epoch: 184, Accuracy: 49.76%
  Loss: 25%: 0.172795 50%: 0.206543 75%: 0.248675
   A

In [5]:
ACCUMULATION_STEPS = 1022 # *14 close to 1024
apply_concept({"vocabulary": 1.0, "noise": 0.1}, validation=True)
apply_concept({"vocabulary": 1.0, "noise": 0.49})
trainFor(50, log_freq=DATA_SIZE, target_acc=0.7)
# (229mins)

Concepts loaded;
    - vocabulary: 6808
    - noise: 15614
  Total: 22423
Concepts loaded;
    - vocabulary: 6808
    - noise: 76510
  Total: 83319
Epoch: 191, Batch: 0, Loss: 0.1820286512374878
Epoch: 191, Batch: 5951, Loss: 0.10562223941087723
Epoch: 191, Accuracy: 50.85%
  Loss: 25%: 0.164790 50%: 0.199412 75%: 0.238296
   Avg: 0.214035
Validation Accuracy: 53.124%
Epoch: 192, Batch: 0, Loss: 0.1497276872396469
Epoch: 192, Batch: 5951, Loss: 0.18680252134799957
Epoch: 192, Accuracy: 50.98%
  Loss: 25%: 0.162247 50%: 0.196092 75%: 0.236591
   Avg: 0.211390
Validation Accuracy: 52.803%
Epoch: 193, Batch: 0, Loss: 0.19556422531604767
Epoch: 193, Batch: 5951, Loss: 0.280161052942276
Epoch: 193, Accuracy: 51.34%
  Loss: 25%: 0.160275 50%: 0.194838 75%: 0.233409
   Avg: 0.209014
Validation Accuracy: 53.097%
Epoch: 194, Batch: 0, Loss: 0.2834548056125641
Epoch: 194, Batch: 5951, Loss: 0.6966380476951599
Epoch: 194, Accuracy: 51.55%
  Loss: 25%: 0.159822 50%: 0.194443 75%: 0.231409
   Avg: 

In [13]:
# Pytorch crashed - leading to emergency saving
save_model()

In [5]:
# Validate the model saved correctly
apply_concept({"vocabulary": 1.0, "noise": 0.1}, validation=True)
validate()
apply_concept({"vocabulary": 1.0}, validation=True)
validate()
apply_concept({"noise": 0.1}, validation=True)
validate()

Concepts loaded;
    - vocabulary: 6808
    - noise: 15614
  Total: 22423
Validation Accuracy: 52.932%
Concepts loaded;
    - vocabulary: 6808
  Total: 6809
Validation Accuracy: 80.291%
Concepts loaded;
    - noise: 15614
  Total: 15615
Validation Accuracy: 41.005%


In [5]:
ACCUMULATION_STEPS = 2044 # *14 close to 1024
apply_concept({"vocabulary": 1.0, "noise": 0.1}, validation=True)
apply_concept({"vocabulary": 1.0, "noise": 0.9})
trainFor(50, log_freq=DATA_SIZE, target_acc=0.75)
# (635mins)
# (total 2525mins)

Concepts loaded;
    - vocabulary: 6808
    - noise: 15614
  Total: 22423
Concepts loaded;
    - vocabulary: 6808
    - noise: 140528
  Total: 147337
Epoch: 210, Batch: 0, Loss: 0.24935071170330048
Epoch: 210, Batch: 10524, Loss: 0.14365918934345245
Epoch: 210, Accuracy: 48.09%
  Loss: 25%: 0.173191 50%: 0.209463 75%: 0.249764
   Avg: 0.228281
Validation Accuracy: 53.182%
Epoch: 211, Batch: 0, Loss: 0.484810471534729
Epoch: 211, Batch: 10524, Loss: 0.1400684267282486
Epoch: 211, Accuracy: 48.27%
  Loss: 25%: 0.171612 50%: 0.206248 75%: 0.245603
   Avg: 0.224276
Validation Accuracy: 53.204%
Epoch: 212, Batch: 0, Loss: 0.1532992422580719
Epoch: 212, Batch: 10524, Loss: 0.0006325695430859923
Epoch: 212, Accuracy: 48.22%
  Loss: 25%: 0.170276 50%: 0.204755 75%: 0.242843
   Avg: 0.220793
Validation Accuracy: 53.035%
Epoch: 213, Batch: 0, Loss: 0.20600691437721252
Epoch: 213, Batch: 10524, Loss: 0.0707259476184845
Epoch: 213, Accuracy: 48.49%
  Loss: 25%: 0.168891 50%: 0.202694 75%: 0.241886

In [5]:
ACCUMULATION_STEPS = 1022 # *14 close to 1024
apply_concept({"vocabulary": 1.0, "noise": 0.1}, validation=True)
apply_concept({"vocabulary": 1.0, "noise": 0.9})
trainFor(50, log_freq=DATA_SIZE, target_acc=0.75)
# (347mins)

Concepts loaded;
    - vocabulary: 6808
    - noise: 15614
  Total: 22423
Concepts loaded;
    - vocabulary: 6808
    - noise: 140528
  Total: 147337
Epoch: 231, Batch: 0, Loss: 0.2609182894229889
Epoch: 231, Batch: 10524, Loss: 0.018615350127220154
Epoch: 231, Accuracy: 50.96%
  Loss: 25%: 0.156389 50%: 0.187656 75%: 0.224919
   Avg: 0.199664
Total Seq Accuracy: 53.142%
Average Sequence: 90.449%
Epoch: 232, Batch: 0, Loss: 0.15666547417640686
Epoch: 232, Batch: 10524, Loss: 0.2910940945148468
Epoch: 232, Accuracy: 51.26%
  Loss: 25%: 0.154557 50%: 0.185810 75%: 0.223254
   Avg: 0.197851
Total Seq Accuracy: 53.160%
Average Sequence: 90.445%
Epoch: 233, Batch: 0, Loss: 0.20547236502170563
Epoch: 233, Batch: 10524, Loss: 0.0004840803158003837
Epoch: 233, Accuracy: 51.53%
  Loss: 25%: 0.153975 50%: 0.185541 75%: 0.221745
   Avg: 0.196544
Total Seq Accuracy: 53.218%
Average Sequence: 90.456%
Epoch: 234, Batch: 0, Loss: 0.27715635299682617
Epoch: 234, Batch: 10524, Loss: 0.25293126702308655

In [None]:
apply_concept({"vocabulary": 1.0, "noise": 0.1}, validation=True)
validate()
apply_concept({"vocabulary": 1.0}, validation=True)
validate()
apply_concept({"noise": 0.1}, validation=True)
validate()

In [None]:
ACCUMULATION_STEPS = 28 # *14 close to 32
apply_concept({"vocabulary": 1.0, "noise": 0.9})
trainFor(50, log_freq=DATA_SIZE, target_acc=0.8)

In [None]:
apply_concept({"vocabulary": 1.0, "noise": 0.1}, validation=True)
validate()
apply_concept({"vocabulary": 1.0}, validation=True)
validate()
apply_concept({"noise": 0.1}, validation=True)
validate()