In [None]:
# Library import cell

import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from matplotlib import style
import itertools
from scipy.optimize import curve_fit
import seaborn as sns
import math

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torch.nn.functional as F

In [None]:
# Function cell

def generate_random_binary_string(length):
    binary_string = ''.join(np.random.choice(['0', '1'], size=length))
    return binary_string

def generate_dict(n_tasks, len_taskcode, num_checks, len_message):
    unique_strings = set()
    tasks_dict = {}
    if n_tasks > np.power(2, len_taskcode):
        print("Error: n_tasks is too large")
        return False
    while len(unique_strings) < n_tasks:
        binary_string = generate_random_binary_string(len_taskcode)
        if binary_string not in unique_strings:
            unique_strings.add(binary_string)
            integer_list = np.random.choice(range(len_message), size=num_checks, replace=False).tolist()
            tasks_dict[binary_string] = integer_list
    return tasks_dict

# Currently following a probability distribution given by the probabilities list
def generate_dataset(tasks_dict, num_samples):
    data = np.zeros((num_samples, len_taskcode + len_message))
    value = np.zeros(num_samples)
    
    task_list = list(tasks_dict)
    rank = np.arange(1, len(task_list) + 1)
    #probabilities = [1/(x**2) for x in rank]
    #probabilities = [np.exp(-0.1*x) for x in rank]
    #probabilities = [0.9, 0.09, 0.009, 0.001]
    probabilities = [1/(2**x) for x in rank]
    task_probabilities = probabilities / np.sum(probabilities)  # normalize probabilities
    #print('tasks dict = ', tasks_dict)
    #print('task_probabilities = ', task_probabilities)
    for i in range(num_samples):
        rand_task = np.random.choice(task_list, p=task_probabilities)
        rand_checkbits = tasks_dict[rand_task]
        message = generate_random_binary_string(len_message)
        parity_bit = 0
        for j in rand_checkbits:
            parity_bit += int(message[j])
        parity = parity_bit % 2
        data[i] = np.concatenate((np.array(list(rand_task)), np.array(list(message))))
        value[i] = parity
    
    return [data, value]

def generate_dataset_for_task(task_code, num_samples):
  data = np.zeros((num_samples, len_taskcode + len_message))
  value = np.zeros(num_samples)
  for i in range(num_samples):
    rand_task = task_code
    rand_checkbits = tasks_dict[rand_task]
    message = generate_random_binary_string(len_message)
    parity_bit = 0
    for j in rand_checkbits:
      parity_bit += int(message[j])
    parity = parity_bit % 2
    data[i] = np.concatenate((np.array(list(rand_task)), np.array(list(message))))
    value[i] = parity
  return [data, value]

# IS NOT USING BATCH NORMALISATION
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, output_size, num_layers, hidden_size):
        super(NeuralNetwork, self).__init__()
        self.layers = nn.ModuleList()
        self.layers.append(nn.Linear(input_size, hidden_size))
        
        for _ in range(num_layers - 2):
            self.layers.append(nn.Linear(hidden_size, hidden_size))
        
        self.layers.append(nn.Linear(hidden_size, output_size))
    
    def forward(self, x):
        for layer in self.layers[:-1]:
            x = F.relu(layer(x))
        
        x = self.layers[-1](x)
        return x

# Define a custom dataset
class CustomDataset(Dataset):
    def __init__(self, dataframe):
        self.data = dataframe.iloc[:, :-1].values
        self.target = dataframe.iloc[:, -1].values
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        x = torch.from_numpy(self.data[idx]).float()
        y = torch.tensor(self.target[idx]).float()
        return x, y

In [None]:
# Plotting functions

def epoch_plots(num_epochs, loss_list, accuracy_list):
    # Create subplots with 1 row and 2 columns
    fig, axs = plt.subplots(1, 2, figsize=(10, 5))

    # Plot the training and test losses on the first subplot
    axs[0].plot(range(1, num_epochs+1), loss_list, label='Loss')
    axs[0].set_xlabel('Epoch')
    axs[0].set_ylabel('Loss')
    axs[0].legend()

    # Plot the training and test accuracies on the second subplot
    axs[1].plot(range(1, num_epochs+1), accuracy_list, label='Accuracy')
    axs[1].set_xlabel('Epoch')
    axs[1].set_ylabel('Accuracy')
    axs[1].legend()

    # Adjust the spacing between subplots
    #plt.tight_layout()
    plt.suptitle(f'Model performance')
    # Show the subplots
    plt.show()
    
    return

In [None]:
# Creating the run before the transfer learning test. A standard parity check run
# Note that when generating data a 1/2^n distribution is being applied.

# Parameters
n_tasks = 16 # number of tasks created
len_taskcode = 8 # length of the code defining the task. This will go at the beginning of each entry
num_checks = 3  # Length of the associated integer list for each task. Must be less than len_message
len_message = 8  # Maximum integer value in the associated integer list
#num_samples_list = np.logspace(start=3, stop=5, num=10, base=10, dtype=int) # number of samples created in the dataset
num_samples = 5000 # This is the number of samples created per epoch
samples_per_task = 200
task_sample_freq = 10 # How many epochs between sampling of tasks
# Define hyperparameters
input_size = len_taskcode + len_message
output_size = 1
learning_rate = 0.001
batch_size = 32
num_epochs = 7000 # To be halted post grok
num_layers = 8
hidden_size = 256

plot_freq = 500 # How many epoch between creating a plot of progress
#moving_avg = 1 # How the averaging is done in accuracy by task plots. Seems to cause bugs

# Train the model
tasks_dict = generate_dict(n_tasks, len_taskcode, num_checks, len_message) # Moved intentionally earlier
print("tasks_dict = ", tasks_dict.items())

# Create instances of the neural network
model = NeuralNetwork(input_size, output_size, num_layers, hidden_size)

# Define loss function and optimizer (same as before)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

loss_list = np.empty(num_epochs)
accuracy_list = np.empty(num_epochs)
accuracy_array = np.zeros((n_tasks, num_epochs//task_sample_freq)) # such that each row is the accuracy for that specific task over all epochs
acc_counter = 0
for epoch in tqdm(range(num_epochs)):
    
    [data, value] = generate_dataset(tasks_dict, num_samples)
    # Create a dataframe for training
    df = pd.DataFrame(np.concatenate((data, value.reshape(-1, 1)), axis=1), columns=[f'feature_{i}' for i in range(len_taskcode + len_message)] + ['target'])
    
    # Create DataLoaders for the training and test data
    dataset = CustomDataset(df)
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    loss = 0.0
    correct = 0
    total = 0
    model.train()

    for inputs, labels in data_loader:
        # Forward pass
        outputs = model(inputs)

        # Compute loss
        loss = criterion(outputs, labels.unsqueeze(1))

        # Compute predictions
        predictions = (outputs >= 0.5).squeeze().long()

        # Compute accuracy
        correct += (predictions == labels).sum().item()
        total += labels.size(0)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loss += loss.item() * inputs.size(0)

    # Calculate average train loss
    avg_loss = loss / len(dataset)
    # Calculate average train accuracy
    avg_accuracy = correct / total

    # Update the loss list
    loss_list[epoch] = avg_loss
    accuracy_list[epoch] = avg_accuracy
    
    if epoch%task_sample_freq == 0:
        # Find accuracy data
        tasks_list = list(tasks_dict.keys())
        for i in range(n_tasks):
          task_code = tasks_list[i]
          #print(f"task_code = ", task_code)
          [data_per_task, value_per_task] = generate_dataset_for_task(task_code, samples_per_task)
          #print(data_per_task)
          df_per_task = pd.DataFrame(np.concatenate((data_per_task, value_per_task.reshape(-1, 1)), axis=1), columns=[f'feature_{i}' for i in range(len_taskcode + len_message)] + ['target'])
          #print(df_per_task)
          dataset_per_task = CustomDataset(df_per_task)
          loader_per_task = DataLoader(dataset_per_task, batch_size=batch_size, shuffle=True)
          model.eval()
          task_correct = 0
          task_total = 0
          with torch.no_grad():
            for inputs, labels in loader_per_task:
                outputs = model(inputs)
                # Compute predictions
                predictions = (outputs >= 0.5).squeeze().long()

                # Compute accuracy
                task_correct += (predictions == labels).sum().item()
                task_total += labels.size(0)
                #print(f'inputs = ', inputs)
                #print(f'labels.size(0) = ', labels.size(0))

                #loss = criterion(outputs, labels.unsqueeze(1)) #if in future I want to calculate loss
                #test_loss += loss.item() * inputs.size(0)
            task_accuracy = task_correct / task_total
            #print(f"task_accuracy = ", task_accuracy)
            accuracy_array[(i, acc_counter)] = task_accuracy
        acc_counter += 1
        
    if epoch % plot_freq == 0 and epoch != 0:
        epoch_plots(epoch, loss_list[:epoch], accuracy_list[:epoch])

epoch_plots(num_epochs, loss_list, accuracy_list)
    
# Display model parameter number. If model is changed, should go into the loop
pytorch_total_params = sum(p.numel() for p in model.parameters())
print(f"Total model parameters = {pytorch_total_params}")