In [57]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
import torch
from torch import nn
import pandas as pd
import numpy as np
import random

In [110]:
def set_random_seed(seed):
    torch.backends.cudnn.deterministic = True
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    
set_random_seed(7)

In [111]:
dataset = datasets.fetch_california_housing()
X = dataset['data'].astype(np.float32)
y = dataset['target'].astype(np.float32)

X_train, X_test, y_train, y_test = train_test_split(X, y)

In [112]:
LR = 1e-3
WEIGHT_DECAY = 1e-3
NUM_EPOCHS = 20
GAMMA = 0.9999
BATCH_SIZE = 64
EVAL_BATCH_SIZE = 300

In [113]:
from IPython.display import display, clear_output
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader

class MyDataset(Dataset):
    def __init__(self, X, y):
        super().__init__()
        self.X = X
        self.y = y
        self.len = len(X)

    def __len__(self):
        return self.len

    def __getitem__(self, index):
        return self.X[index], self.y[index]
    
    
class Model(nn.Module):
    def __init__(self, in_features = 8, out_features = 1):
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features

        self.sequential = nn.Sequential(
            nn.Linear(8, 8),
            nn.Sigmoid(),
            nn.Linear(8, 8),
            nn.Sigmoid(),
            nn.Linear(8, 4),
            nn.Sigmoid(),
            nn.Linear(4, 2),
            nn.Sigmoid(),
            nn.Linear(2, 1),
        )

    def forward(self, x):
        x = self.sequential(x)
        return x

In [114]:
train_dl = DataLoader(
    MyDataset(X_train, y_train),
    batch_size = BATCH_SIZE,
    shuffle = True
)

test_dl = DataLoader(
    MyDataset(X_test, y_test),
    batch_size = EVAL_BATCH_SIZE,
    shuffle = False
)

dls = {'train': train_dl, 'test': test_dl}

In [115]:
model = Model(X_train.shape[1])
loss_fn = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=GAMMA)

In [116]:
metrics_dict = {
    "Epoch": [],
    "Train RMSE": [],
    "Test RMSE": []
}

for epoch in tqdm(range(NUM_EPOCHS)):
    metrics_dict["Epoch"].append(epoch)
    for stage in ['train', 'test']:
        with torch.set_grad_enabled(stage == 'train'):
            if stage == 'train':
                model.train()
            else:
                 model.eval()

            loss_at_stage = 0 
            for batch in dls[stage]:
                x_batch, y_batch = batch
                y_pred = model(x_batch).view(-1)
                loss = loss_fn(y_pred, y_batch)
                if stage == "train":
                    loss.backward()
                    optimizer.step()
                    scheduler.step()
                    optimizer.zero_grad()
                with torch.no_grad():
                    loss_at_stage += (torch.square((y_pred - y_batch)).sum()).item()
            rmse_at_stage = (loss_at_stage / len(dls[stage].dataset)) ** (1/2)
            metrics_dict[f"{stage.title()} RMSE"].append(rmse_at_stage)
            
    clear_output(wait=True)
    display(pd.DataFrame(metrics_dict))

Unnamed: 0,Epoch,Train RMSE,Test RMSE
0,0,1.237938,1.16536
1,1,1.155284,1.155655
2,2,1.153684,1.154786
3,3,1.153583,1.154645
4,4,1.153527,1.154284
5,5,1.153479,1.154712
6,6,1.153181,1.15369
7,7,1.152301,1.152568
8,8,1.14516,1.138504
9,9,1.126171,1.112812
