## Import Libraries

In [None]:
import numpy as np
import torch
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split

from Dataset import load_dataset, SimpleDataset, Preprocessor
from Model.model import MLP
from Model.evaluate import stats, visualize
from Search import ProjectedGradientDescent, RandomSearch

# Autoreload 
%load_ext autoreload
%autoreload 2

### Set simulation parameters and Cuda

In [None]:
params = {
    'depth': 6,
    'hidden_size': 320,
    'skip_connect': 2,
    'max_time': 60*5, # 5 minutes
    'num_runs': 2,
    'epochs': 5, # 100
    'batch_size': 256,

    'rnd_trials': 1,
    'rnd_time': 1*60, # 5 minutes
    'pgd_trials': 1,  
    'pgd_iter': 1000,  
}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using " + str(device))

# Parameters for model evaluation
epochs_candidates = [10, 30, 50, 100, 150]
sizes_candidates = [5, 20, 40, 80, 160, 320, 640]
depths_candidates = [2, 4, 6, 8, 9, 10]
skip_candidates = [0, 2, 3, 4, 5, 6]

## Load the dataset `64` information bytes, 1024  total bits (Rate = 0.5, half the bits are for payload)

In [None]:
data, target = load_dataset('Data/', 'fb1024.txt', 'fer1024.txt')

print(f'Number of samples: {len(data)}, Number of bits: {len(data[0])}')
print(f"Smallest FER: {target.min():E}, largest FER: {target.max():E}")

In [None]:
x_train_, x_val_, y_train_, y_val_ = train_test_split(data, target, test_size=0.2)
print(f"# Train samples: {x_train_.shape[0]}, # Validation samples: {x_val_.shape[0]}")

### Feature Preprocessing  

We will analyze how the bit varies within the samples. If the bit remains constant for all samples, we can freely discard them, which will reduce the number of inputs and number of parameters in the neural network. After that, standardize the remaining bits by subtracting mean and dividing by standard deviation.

Furthermore, apply logarithm to the `FER` to simplify the regression task.

In [None]:
# Get the indexes of the bits that vary
indices = torch.where(x_train_.std(dim = 0)>0)[0].numpy()  
print(f"Number of varying indexes: {len(indices)}; {100*len(indices)/len(x_train_[0]):.2f}%")

In [None]:
#Store mean values for each bit (along the samples)
bit_means = torch.mean(x_train_, dim=0, keepdim=True) 

converter = Preprocessor()
converter.fit(x_train_, y_train_, indices, bit_means)
x_train, y_train = converter.transform(x_train_, y_train_)
x_val, y_val = converter.transform(x_val_, y_val_)

print(f"Original train/valid shapes: {x_train_.shape, x_val_.shape}")
print(f"New train/valid shapes: {x_train.shape, x_val.shape}")
print(f"Original FER: {float(y_train_[0]):E}, Converted FER: {float(y_train[0]):E}")

In [26]:
train_loader = DataLoader(SimpleDataset(x_train, y_train, device), batch_size=params['batch_size'], shuffle=True)
val_loader = DataLoader(SimpleDataset(x_val, y_val, device), batch_size=params['batch_size'])

# Simulations

### 1) Performance vs Epochs

In [None]:
results_epochs = []
for epochs in epochs_candidates:
    print(f"Training for {epochs} epochs")
    model = MLP(input_dim=x_train.shape[1], hidden_size=params['hidden_size'], depth=params['depth'], 
                skip_connect=params['skip_connect']).to(device)
    statistics = stats(model, epochs, train_loader, val_loader, converter, 
                       params['max_time'], params['num_runs'])
    results_epochs.append(statistics)
results_epochs = np.array(results_epochs)

### 2) Performance vs hidden size

In [None]:
results_sizes = []
for hidden_size in sizes_candidates:
    print(f"Training with hidden size {hidden_size}")
    model = MLP(input_dim=x_train.shape[1], hidden_size=hidden_size, depth=params['depth'], 
                skip_connect=params['skip_connect']).to(device)
    statistics = stats(model, params['epochs'], train_loader, val_loader, converter, 
                       params['max_time'], params['num_runs'])
    results_sizes.append(statistics)
results_sizes = np.array(results_sizes)

### 3) Performance vs depth

In [None]:
results_depths = []
for depth in depths_candidates:
    print(f"Training with depth {depth}")
    model = MLP(input_dim=x_train.shape[1], hidden_size=params['hidden_size'], depth=depth, 
                skip_connect=params['skip_connect']).to(device)
    statistics = stats(model, params['epochs'], train_loader, val_loader, converter, 
                       params['max_time'], params['num_runs'])
    results_depths.append(statistics)
results_depths = np.array(results_depths)

### 4) Performance vs skip-connections

In [None]:
results_skips = []
for skip_connect in skip_candidates:
    print(f"Training with skip connections {skip_connect}")
    model = MLP(input_dim=x_train.shape[1], hidden_size=params['hidden_size'], depth=params['depth'], 
                skip_connect=skip_connect).to(device)
    statistics = stats(model, params['epochs'], train_loader, val_loader, converter, 
                       params['max_time'], params['num_runs'])
    results_skips.append(statistics)
results_skips = np.array(results_skips)

## 5) Visualization

In [None]:
visualize("epochs", epochs_candidates, results_epochs)
visualize("hidden size", sizes_candidates, results_sizes)
visualize("depth", depths_candidates, results_depths)
visualize("skip connections", skip_candidates, results_skips)

# Generating frozen bit set

We will try to find a configuration of bits, which will assure the performance better than the best sample in the dataset (smallest `FER`). For the search, we will first use `Random Search` as a baseline and `Projected Gradient Descent`. The final output will later be evaluated in using `AFF3CT` for the `SNR` of interest and the results will be available in the presentation.

In [None]:
best_fer_dataset = torch.min(y_train)
print(f"Best FER in dataset: {best_fer_dataset}")

### 1) Random Search

In [None]:
model_rnd = MLP(input_dim=x_train.shape[1], hidden_size=params['hidden_size'], depth=params['depth'], skip_connect=params['skip_connect']).to(device)
model_rnd.fit(train_loader, params['epochs'], verbose=False)

for _ in range(params['rnd_trials']):    
    RandomSearch(model_rnd, x_train, converter, params['rnd_time'], best_fer_dataset)

### 2) Projected Gradient Descent

In [None]:
model_pgd = MLP(input_dim=x_train.shape[1], hidden_size=params['hidden_size'], depth=params['depth'], skip_connect=params['skip_connect']).to(device)
model_pgd.fit(train_loader, params['epochs'], verbose=False)

for _ in range(params['pgd_trials']):    
    ProjectedGradientDescent(model_pgd, x_train, converter, params['pgd_iter'], best_fer_dataset)