In [1]:
import os
# set gpu number to 2
os.environ["CUDA_VISIBLE_DEVICES"] = "3"

In [2]:
from attention_all_layers import TemporalAugmentedDataset, EvalDataWrapper
import torchvision.transforms as transforms

transform = transforms.Compose([
    transforms.ToTensor(),
    # transforms.Normalize((0.5,), (0.5,))
])
from models.noisy_dataloader import NoisyTemporalDataset
from utils.transforms import MeanFlat, RandomRepeatedNoise, Identity
from functools import partial

eye = Identity()

def worker_init_fn(worker_id):
    os.sched_setaffinity(0, range(os.cpu_count()))


timestep_transforms = [eye] * 20
# Create instances of the Fashion MNIST dataset
test_dataset = TemporalAugmentedDataset('test', transform=transform,
                                img_to_timesteps_transforms=timestep_transforms)
from torch.utils.data import DataLoader

from utils.visualization import visualize_first_batch_with_timesteps

test_loader = DataLoader(EvalDataWrapper(test_dataset, contrast=1, rep_noise=False), batch_size=100, shuffle=False, num_workers=30, worker_init_fn=worker_init_fn)

Found cached dataset fashion_mnist (/var/local/glang/.cache/huggingface/datasets/fashion_mnist/fashion_mnist/1.0.0/0a671f063342996f19779d38c0ab4abef9c64f757b35af8134b331c294d7ba48)


In [3]:
from modules.lateral_recurrence import LateralRecurrence
from modules.exponential_decay import ExponentialDecay
from modules.divisive_norm import DivisiveNorm
from modules.divisive_norm_group import DivisiveNormGroup
from modules.div_norm_channel import DivisiveNormChannel
from pytorch_lightning.loggers import CSVLogger, TensorBoardLogger
import pytorch_lightning as pl
import json
from models.adaptation import Adaptation
from HookedRecursiveCNN import HookedRecursiveCNN

# HookedRecursiveCNN needs layer_kwargs and div_norm_kwargs to know how to setup the model but the concrete init values are unimportant as they will get overwritten with the pretrained values
layer_kwargs = [{'in_channels': 1, 'out_channels': 32, 'kernel_size': 5},
 {'in_channels': 32, 'out_channels': 32, 'kernel_size': 5},
 {'in_channels': 32, 'out_channels': 32, 'kernel_size': 3},
 {'in_channels': 32, 'out_channels': 32, 'kernel_size': 3},
 {'in_features': 128, 'out_features': 1024}]

div_norm_kwargs = [
    {"epsilon":  1e-8, "K_init":  0.2, "train_K":  True, "alpha_init":  -2.0, "train_alpha": True, "sigma_init": 0.1, "train_sigma": True},
    {"epsilon":  1e-8, "K_init":  1.0, "train_K":  False, "alpha_init":  -2000000.0, "train_alpha": False, "sigma_init": 1.0, "train_sigma": False},
    {"epsilon":  1e-8, "K_init":  1.0, "train_K":  False, "alpha_init":  -2000000.0, "train_alpha": False, "sigma_init": 1.0, "train_sigma": False},
    {"epsilon":  1e-8, "K_init":  1.0, "train_K":  False, "alpha_init":  -2000000.0, "train_alpha": False, "sigma_init": 1.0, "train_sigma": False},
    {"epsilon":  1e-8, "K_init":  1.0, "train_K":  False, "alpha_init":  0.0, "train_alpha": False, "sigma_init": 1.0, "train_sigma": False}
  ]
exp_decay_kwargs = [
    {"alpha_init":  1.0, "train_alpha": False, "beta_init": 1, "train_beta": False},
    {"alpha_init":  1.0, "train_alpha": False, "beta_init": 1, "train_beta": False},
    {"alpha_init":  1.0, "train_alpha": False, "beta_init": 1, "train_beta": False},
    {"alpha_init":  1.0, "train_alpha": False, "beta_init": 1, "train_beta": False},
    {"alpha_init":  1.0, "train_alpha": False, "beta_init": 1, "train_beta": False}
  ]

div_norm_cfg = {
    't_steps': 20, 'layer_kwargs': layer_kwargs,
    'adaptation_module': DivisiveNorm,
    'adaptation_kwargs': div_norm_kwargs, 'decode_every_timestep': True
}
exp_decay_cfg = {
    't_steps': 20, 'layer_kwargs': layer_kwargs,
    'adaptation_module': ExponentialDecay,
    'adaptation_kwargs': exp_decay_kwargs, 'decode_every_timestep': True
}

In [4]:
from tqdm import tqdm
from torchmetrics.functional import accuracy
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

In [None]:
div_norm_model = HookedRecursiveCNN.load_from_checkpoint(
    'learned_models/sqrt_augmented_attn_all_layers_DivisiveNorm_contrast_random_epoch_50.ckpt', div_norm_cfg)
unsqrt_div_norm_model = HookedRecursiveCNN.load_from_checkpoint(
    'learned_models/augmented_attn_all_layers_DivisiveNorm_contrast_random_epoch_30.ckpt', div_norm_cfg)
exp_decay_model = HookedRecursiveCNN.load_from_checkpoint(
    'learned_models/augmented_attn_all_layers_ExponentialDecay_contrast_random_epoch_50.ckpt', exp_decay_cfg)
baseline_model = HookedRecursiveCNN.load_from_checkpoint(
    'learned_models/baseline_video_fmnist_ExponentialDecay_contrast_random_epoch_10.ckpt', exp_decay_cfg)
models = {
    'Divisive Norm.': div_norm_model,
    'Additive': exp_decay_model,
    'No Adaptation': baseline_model
}

## Things I will eventually plot but I'm not yet interested in

Note that these are mostly the same metrics as I already plotted for the 1-layer adaptation

* example images input data
* accuracy over time
* accuracy over time with only 1 image
* accuracy for different onsets of the second image
* goal here is to show that the behavior is mostly the same although the dataset is much more challenging

## Things that I'm most interested in because they are potentially novel

* activation over time per layer -> would be great to see that later layers have more adaptation because that's what the brain shows
* check if there are feature maps who get