In [1]:
import argparse

import torch
import torch.optim as optim
from torchvision import transforms
import json
import os

from train import fit
from intrinsic_wrappers import *
from utils import get_input_args
from model_builder import *
from dataset_builder import build_dataset


if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU is not available")

# add reproducibility stuff
torch.manual_seed(42)
torch.cuda.manual_seed_all(42)
np.random.seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

GPU is available


In [2]:
args = argparse.Namespace(dataset='mnist', intrinsic_dim=600, learning_rate=0.01, architecture='fcn', hidden_dim=200, num_layers=1, training_result_file='results.txt', projection='dense', model_save_path="model", optimizer='sgd', num_epochs=100, l2_reg=0.0)

print(args)

Namespace(dataset='mnist', intrinsic_dim=600, learning_rate=0.01, architecture='fcn', hidden_dim=200, num_layers=1, training_result_file='results.txt', projection='dense', model_save_path='model', optimizer='sgd', num_epochs=100, l2_reg=0.0)


In [3]:
# load the dataset and all the cooresponding parameters
(
    train_dataset,
    test_dataset,
    CHANNEL_IN,
    INPUT_HEIGHT,
    INPUT_WIDTH,
    OUTPUT_DIM,
    VAL_GLOBAL_ACCURACY,
) = build_dataset(
    dataset_name=args.dataset.upper(),
    img_transform=transforms.Compose([transforms.ToTensor()]),
)

# load the model
model, num_params = build_model(
    args.architecture,
    INPUT_HEIGHT,
    INPUT_WIDTH,
    args.hidden_dim,
    OUTPUT_DIM,
    args.num_layers,
    CHANNEL_IN,
    args.training_result_file,
    args.dataset,
    device, # the function already push the model to specific device!
)

# it is a little bit ugly: 0 hidden dimension means that we do not want to use the intrinsic dimension method to train our network and we use a normal training
if args.intrinsic_dim > 0:
    # project the model on the subspace of dimension equal to intrinsic dimension
    if args.projection == "dense":
        # dense projection
        model_intrinsic = DenseWrap(model, args.intrinsic_dim, device)
    elif args.projection == "fastfood":
        # fastfood projection
        model_intrinsic = FastfoodWrapper(model, args.intrinsic_dim, device)
        # TODO: add the other projection methods like sparse and others
    else:
        raise Exception("Name of projection not in: [dense, fastfood]")
else:
    # standard training
    model_intrinsic = model

count_params(model=model_intrinsic, msg="Number of intrinsic parameters: ")

model_intrinsic.to(device)

saving_path = f"./{args.model_save_path}/{args.architecture}/{args.dataset}/"
pretrained_path = f"{saving_path}{args.architecture}_h{args.hidden_dim}_id{args.intrinsic_dim}_lay{args.num_layers}_lr{args.learning_rate}_proj_{args.projection}_opt_{args.optimizer}.pt" if args.architecture == "fcn" else f"{saving_path}{args.architecture}_id{args.intrinsic_dim}_lr{args.learning_rate}_proj_{args.projection}_opt_{args.optimizer}.pt"

model_intrinsic.load_state_dict(torch.load(f"{saving_path}fcn_h200_id600_lay1.pt", map_location=device))
model_intrinsic.eval()


Number of parameters: 199210
FullyConnectedNetwork(
  (fc_in): Linear(in_features=784, out_features=200, bias=True)
  (fcs): ModuleList(
    (0): Linear(in_features=200, out_features=200, bias=True)
  )
  (fc_out): Linear(in_features=200, out_features=10, bias=True)
)
Number of intrinsic parameters: 600


DenseWrap()

In [4]:
from torch.utils.data import DataLoader
test_dataloader = DataLoader(
        test_dataset,
        batch_size=128,
        shuffle=True,
        num_workers=2,
        pin_memory=True,
        persistent_workers=True,  # on Winzoz system this is needed, if you don't want wait forever for the creation of the workers at each epoch
    )

In [5]:
from tqdm import tqdm

test_loss = 0
correct = 0

tqdm_iterator = tqdm(
    enumerate(test_dataloader),
    total=len(test_dataloader),
    desc="",
    leave=True,
)

len_ts_dl_ds = len(test_dataloader.dataset)

with torch.no_grad():
    for batch_idx, (data, target) in tqdm_iterator:
        data, target = data.to(device, non_blocking=True), target.to(
            device, non_blocking=True
        )
        output = model_intrinsic(data)
        test_loss += F.cross_entropy(output, target).item()  # sum up batch loss
        # get the index of the max probability
        pred = output.max(1, keepdim=True)[1] # equal to argmax
        correct += pred.eq(target.view_as(pred)).cpu().sum().item()
        tqdm_iterator.set_description(
            f"Test Epoch: 1 [{batch_idx * len(data)}/{len_ts_dl_ds} \tLoss: {test_loss:.6f}, Accuracy: {correct}/{len_ts_dl_ds} ({100.0 * correct / len_ts_dl_ds}%)"
        )
    test_loss /= len(test_dataloader.dataset)
    print(f"Validation Average loss: {test_loss:.6f}")

    tqdm_iterator.close()

# show an histogram of the weights of the model
"""start = -1
stop = 1
bins = 30
for param in model.parameters():
    if param.requires_grad:
        
        hist = torch.histc(param.data, bins = bins, min = start, max = stop)
        x = np.arange(start, stop, (stop-start)/bins)
        plt.bar(x, hist.cpu(), align='center')
        plt.ylabel('Frequency')
        plt.show() """

print(correct / len(test_dataloader.dataset), test_loss)

Test Epoch: 1 [1248/10000 	Loss: 25.295899, Accuracy: 9053/10000 (90.53%): 100%|██████████| 79/79 [00:02<00:00, 31.92it/s]

Validation Average loss: 0.002530
0.9053 0.0025295899376273157





In [6]:
for name, param in model_intrinsic.named_parameters():
    print('name: ', name)
    print(type(param))
    print('param.shape: ', param.shape)
    print('param.requires_grad: ', param.requires_grad)
    print('=====')

name:  V
<class 'torch.nn.parameter.Parameter'>
param.shape:  torch.Size([600, 1])
param.requires_grad:  True
=====
