In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from tqdm.auto import tqdm

# Custom modules
from utils import *
from models import *


SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


### Teacher model

In [2]:
teacher = Teacher().to(device)
teacher.init_weights()

# Put the model in evaluation mode
teacher.eval()

Teacher(
  (activation): ReLU()
  (net): Sequential(
    (0): Linear(in_features=100, out_features=75, bias=True)
    (1): ReLU()
    (2): Linear(in_features=75, out_features=50, bias=True)
    (3): ReLU()
    (4): Linear(in_features=50, out_features=25, bias=True)
    (5): ReLU()
    (6): Linear(in_features=25, out_features=1, bias=True)
  )
)

We now generate the test dataset

In [3]:
# The covariates are random uniform in [0,2] of 100 dimensions
dataset_test = generate_test_dataset(60000, teacher, device)
test_loader = DataLoader(dataset_test, batch_size=1024, shuffle=False)

### General training parameters

In [4]:
n_steps = 1000
test_every = 50

### Student models

#### Under parameterized student model

In [5]:
student = StudentU().to(device)
student.init_weights()

print_nparams(student)

Number of parameters for StudentU: 1021


In [6]:
losses_train, losses_test = train(
    student = student,
    teacher = teacher,
    test_loader=test_loader,
    lr = 1e-1,
    n_steps=1000,
    test_every=50,
    device=device,
)

Training:   0%|          | 0/1000 [00:00<?, ?it/s]

In [7]:
df_StudentU = pd.DataFrame({
    "train_loss": losses_train,
    "test_loss": interpolate_test_losses(losses_test, n_steps, test_every)
})
df_StudentU["train_loss_SMA"] = df_StudentU["train_loss"].rolling(window=10).mean()

#### Equally parametrized student model

In [None]:
student = StudentE().to(device)
student.init_weights()

print_nparams(student)

In [None]:
losses_train, losses_test = train(
    student = student,
    teacher = teacher,
    test_loader=test_loader,
    lr = 1e-1,
    n_steps=1000,
    test_every=50,
    device=device,
)

In [None]:
df_StudentE = pd.DataFrame({
    "train_loss": losses_train,
    "test_loss": interpolate_test_losses(losses_test, n_steps, test_every)
})
df_StudentE["train_loss_SMA"] = df_StudentU["train_loss"].rolling(window=10).mean()

#### Over Parameterized student model

In [None]:
student = StudentO().to(device)
student.init_weights()

print_nparams(student)

In [None]:
losses_train, losses_test = train(
    student = student,
    teacher = teacher,
    test_loader=test_loader,
    lr = 1e-1,
    n_steps=1000,
    test_every=50,
    device=device,
)

In [None]:
df_StudentO = pd.DataFrame({
    "train_loss": losses_train,
    "test_loss": interpolate_test_losses(losses_test, n_steps, test_every)
})
df_StudentO["train_loss_SMA"] = df_StudentU["train_loss"].rolling(window=10).mean()