In [1]:
%load_ext autoreload
%autoreload 2'
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import normalize
import torch, copy
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as Data
import torch.nn.functional as F
import torchvision
import  matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import gc, os



# New Learning mechanism

### Data

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

data = pd.read_csv('Copper_forecasting_data.csv')

sc = StandardScaler()
X = data.drop(["y"], axis = 1)
X = sc.fit_transform(X)
y = data["y"] / 1000

train_size = int(len(X)*0.8)
batch_size = 30

X_train = X[:train_size, :]
y_train = y[:train_size]
X_test = X[train_size:, :]
y_test = y[train_size:] 

X_train = torch.tensor(np.array(X_train), dtype=torch.float32)
X_test = torch.tensor(np.array(X_test), dtype=torch.float32)
y_train = torch.tensor(np.array(y_train), dtype=torch.float32)
y_test = torch.tensor(np.array(y_test), dtype=torch.float32)

class MyDataset(Data.Dataset):
    def __init__(self, X, y):

        self.X = X
        self.y = y

    def __getitem__(self, index):#返回的是tensor
        X_, y_ = self.X[index], self.y[index]
        return X_, y_

    def __len__(self):
        return len(self.X)

train_loader = torch.utils.data.DataLoader(
    MyDataset(X_train.to(device), y_train.to(device)), batch_size = batch_size, shuffle=False, drop_last = False)

test_loader = torch.utils.data.DataLoader(
    MyDataset(X_test.to(device), y_test.to(device)), batch_size = batch_size, shuffle=False, drop_last = False)

### module_weight_EU_LG_UA
- To acheive a acceptable SLFN
- store in acceptable/wt.pth
- Already acheived, skip this process, jump to reorganise module

In [45]:
from module.Reorg import *
from module.Weight_tune import *

# weight tune module
hidden_dim = 50
model = TwoLayerNet(18, hidden_dim, 1)

config_w_tune = {
    "epochs": 50,
    "epsilon": 10, # 13 for getting acceptable wt | 10 not acceptable
    "lr_lowerbound": 1e-5,
    "optimizer": optim.Adam(model.parameters(), lr = 0.01),
    "criterion": torch.nn.MSELoss(),
}



In [None]:
# Get Acceptable: iteratively random initialisation of weight tuning module
while True:
    acceptable, model, train_loss, test_loss = \
        module_weight_EU_LG_UA(model, train_loader, test_loader, **config_w_tune)
    if acceptable == True:
        break   

In [None]:
# Non Acceptable
acceptable, model, train_loss, test_loss = \
        module_weight_EU_LG_UA(model, train_loader, test_loader, **config_w_tune)
eps, pred = eps_for_each(train_loader, model)
acceptable, max(eps)

In [47]:
criterion = nn.MSELoss()
loss_train = 0
for _, (X, y) in enumerate(train_loader):
    pred = model(X)
    loss_train_ = criterion(pred, y)
    loss_train += loss_train_.item()
loss_train /= len(train_loader)

loss_test = 0
for _, (X, y) in enumerate(test_loader):
    pred_test = model(X)
    loss_test_ = criterion(pred_test, y)
    loss_test += loss_test_.item()
loss_test /= len(test_loader)

loss_test, loss_train

(805.4976902008057, 16.233105659484863)

In [310]:
# check if acceptable SLFN exist
import os
acceptable_wt_path = 'acceptable/wt.pth'
if os.path.exists(acceptable_wt_path):
    print(f"Acceptable SLFN exist in '{acceptable_wt_path}'.")
    model = torch.load(acceptable_wt_path)
else:
    print(f"Acceptable SLFN not exist in '{acceptable_wt_path}'.")
    model = None    

Acceptable SLFN exist in 'acceptable/wt.pth'.


### module reorganising_EU_LG_UA

[Regularise with Pytorch](https://github.com/christianversloot/machine-learning-articles/blob/main/how-to-use-l1-l2-and-elastic-net-regularization-with-pytorch.md)
- L7 p 63
- Classmate p 15


In [52]:
pre_module = "cram"
previous_model = torch.load(f"acceptable/{pre_module}.pth")
eps, y_pred = eps_for_each(train_loader, previous_model)
max(eps), previous_model

(tensor([22.4717]),
 TwoLayerNet(
   (layer_1): Linear(in_features=18, out_features=164, bias=True)
   (layer_out): Linear(in_features=164, out_features=1, bias=True)
   (relu): ReLU()
 ))

In [55]:
from module.Reorg import *
# reorganising module
"""
# these config need to consist to config in weight tune module:
#   1. hidden_dim
#   2. epochs
#   3. eps
#   4. criterion
#   5. lr_bounds
# Note: hidden dim should check the previous model 
"""
config_reorg  = {
    "hidden_dim": 164,
    "epochs": 100,
    "criterion": nn.MSELoss(),     # loss function
    "lr_reg": 0.01,                # learning rate for regularisation
    "lr_w_tune": 0.01,             # learning rate for weight tuning
    "lr_bound_reg": 1e-5,          # lower bound of learning rate regularisation
    "lr_bound_w_tune": 1e-5,       # lower bound of learning rate weight tuning
    "eps_reg": 10,               # if regular eps < eps_reg: accept the model
    "eps_w_tune": 10,            # if weight tune eps < eps_w_tune: accept the model
    "print_reg": False,            # print detail, eg. loss for each epoch, or not
    "print_w_tune": False,         # print detail, eg. loss for each epoch, or not
    "validate_run": False,         # validate the model, or not
}

In [56]:
# Regular module
reorg = reorganising(pre_module, train_loader, test_loader, **config_reorg)
reorg.reorganising()

Acceptable SLFN exist in 'acceptable/cram.pth'.
[ 0.00%] ------------> Checking nodes...
TwoLayerNet(
  (layer_1): Linear(in_features=18, out_features=164, bias=True)
  (layer_out): Linear(in_features=164, out_features=1, bias=True)
  (relu): ReLU()
)
    --> Start regularising_EU_LG_UA
Lr too small, non acceptable module at max eps tensor([7490.7173])
Trim model: Copy model and delete nodes success
    --> Start module_EU_LG
acceptable module at max eps tensor([9.8639])
!!!Trim node from the model, hidden nodes decrease by 1!!!
[ 0.61%] ------------> Checking nodes...
TwoLayerNet(
  (layer_1): Linear(in_features=18, out_features=163, bias=True)
  (layer_out): Linear(in_features=163, out_features=1, bias=True)
  (relu): ReLU()
)
    --> Start regularising_EU_LG_UA
acceptable module at max eps tensor([9.2334])
Trim model: Copy model and delete nodes success
    --> Start module_EU_LG
non acceptable module at max eps tensor([42.6268])
[ 1.23%] ------------> Checking nodes...
TwoLayerNet(

In [57]:
loss_train = 0
for _, (X, y) in enumerate(train_loader):
    pred = reorg.model(X)
    loss_train_ = reorg.criterion(pred, y)
    loss_train += loss_train_.item()
loss_train /= len(train_loader)

loss_test = 0
for _, (X, y) in enumerate(test_loader):
    pred_test = reorg.model(X)
    loss_test_ = reorg.criterion(pred_test, y)
    loss_test += loss_test_.item()
loss_test /= len(test_loader)

loss_test, loss_train, reorg.model

(629.3808708190918,
 14.523092856773964,
 TwoLayerNet(
   (layer_1): Linear(in_features=18, out_features=28, bias=True)
   (layer_out): Linear(in_features=28, out_features=1, bias=True)
   (relu): ReLU()
 ))

In [1]:
"""
# ========= How to use weight tune, regular, and trim =========
# Weight tune ----------------------------------
model = TwoLayerNet(9, config["hidden_dim"], 1)
optimizer = optim.SGD(model.parameters(), lr=config["lr_w_tune"])
reorg.module_weight_EU_LG_UA(model, optimizer)
# Regular --------------------------------------
acceptable, model, train_loss, test_loss = reorg.regularising_EU_LG_UA()
# Trim model --------------------------------------
model_ = reorg.trim_model_nodes(2)
print(reorg.model, model_)
# Reorg ----------------------------------------------
reorg.reorganising()
# final model ------------------------------------------
reorg.model
# test final model -------------------------------------
loss_test = 0
for _, (X, y) in enumerate(test_loader):
    pred_test = reorg.model(X)
    loss_test_ = reorg.criterion(pred_test, y)
    loss_test += loss_test_.item()
print(loss_test)
# load reorganise trained model----------------------------------
model_reorg = torch.load("final_model/Reorg")
"""



### module_ReLU_RI_SO_RE_MU: Cramming

Notation: L9, p5 | L8, p3

In [39]:
class cramming(nn.Module):
    def __init__(self, train_loader, X_train, y_train, eps_bound, s):
        
        """
        model: 
        train_loader
        X_train:
        y_train:
        eps_bound: maximum epsilon accepted for EACH sample
                   as epsilon in weight tuning module and eps_reg, eps_w_tune in module above
        s: float (tiny) that r*(Xc - Xk) != 0 and (s - r*(Xc - Xk))*(s + r*(Xc - Xk)) < 0
        """
        super(cramming, self).__init__()
        if os.path.exists("unacceptable/wt.pth"):
            self.model = torch.load("unacceptable/wt.pth")
        else:
            self.model = None
            print(f"The unacceptable model does not exist.")
        self.input_dim = self.model.layer_1.weight.data.shape[1]
        self.hidden_dim = self.model.layer_1.weight.data.shape[0]
        self.train_loader = train_loader
        self.X_train = X_train
        self.y_train = y_train

        self.eps_bound = eps_bound
        (
        self.eps,
        self.y_pred
        ) = eps_for_each(train_loader, self.model)
        self.s = s
        self.ks = torch.where(self.eps > self.eps_bound)[0]
    

    def cram(self):
        for i, k in enumerate(self.ks):
            print(f"cramming sample {k} |{i/len(self.ks)*100:.2f}% total of {len(self.ks)}")
            r = self.cram_find_r(k)
            self.cram_add_node(r, k)
        torch.save(self.model, "acceptable/Cram.pth")

    def cram_find_r(self, k): 
        # L9, isolation R2: p39, carm: p.54, for multiple case: p.60?
        """
        k: k sample to cram (unaccepted sample with too large epsilon)
        ==========
        outputs
        r: vector that r*(Xc - Xk) != 0 and (s - r*(Xc - Xk))*(s + r*(Xc - Xk)) < 0
        """
        print("find r: r*(Xc - Xk) != 0 and (s - r*(Xc - Xk))*(s + r*(Xc - Xk)) < 0", end = "\r")

        Xc_Xk = []

        k = 1
        X_no_k = torch.cat([X_train[:k], X_train[k+1:]], dim = 0)
        if torch.any(torch.all(X_no_k == X_train[k], dim=1)):
            print("If X_train[k] in X_train: yes. check again")
        
        n = 0
        while True:
            n+=1
            print(f"try vector {n}", end="\r")

            r = torch.rand(self.input_dim)            
            dots = ((X_no_k - X_train[k]) @ r.T) 
            print(max((self.s + dots) * (self.s - dots)))
            
            if (torch.sum(dots == 0) == 0) and (max((self.s + dots) * (self.s - dots)) < 0):
                return r
            
    def cram_add_node(self, r, k):
        """
        k: k sample to cram (unaccepted sample with too large epsilon)
        r: vector that r*(Xc - Xk) != 0 and (s - r*(Xc - Xk))*(s + r*(Xc - Xk)) < 0
        """
        new_model = TwoLayerNet(self.input_dim, self.hidden_dim+3, 1)

        param = self.model.state_dict()
        for name in model.state_dict():
            if name == 'layer_1.weight':
                # First node
                new_w = torch.cat([param[name], r.reshape(1, -1)], dim = 0)
                # Second node
                new_w = torch.cat([new_w, r.reshape(1, -1)], dim = 0)
                # Third node
                new_w = torch.cat([new_w, r.reshape(1, -1)], dim = 0)

                new_model.layer_1.weight.data = new_w
                    
            if name == 'layer_1.bias':
                # First node
                node_add = self.s - torch.dot(r, self.X_train[k])
                new_b = torch.cat([param[name], node_add.reshape(1)], dim = 0)
                # Second node
                node_add = (-1) * torch.dot(r, self.X_train[k])
                new_b = torch.cat([new_b, node_add.reshape(1)], dim = 0)
                # Third node
                node_add = (-1)*self.s - torch.dot(r, self.X_train[k])
                new_b = torch.cat([new_b, node_add.reshape(1)], dim = 0)

                new_model.layer_1.bias.data = new_b

            if name == 'layer_out.weight':
                """
                # the base of Xk = (yk - prediction yk)/s
                aik = nn.ReLU()(model.layer_1.weight.data @ X_train[k].T)
                out_weight = model.layer_out.weight.data.reshape(-1)
                out_bias = model.layer_out.bias.data.reshape(-1)
                base = (y_train[k] - out_bias - torch.dot(out_weight, aik))/s
                """
                base = (self.y_train[k] - self.model(self.X_train[k]))/self.s
                # First node
                new_w_o = torch.cat([param[name], base.reshape(1, 1)], dim = 1)
                # Second node
                new_w_o = torch.cat([new_w_o, ((-2)*base).reshape(1, 1)], dim = 1)
                # Third node
                new_w_o = torch.cat([new_w_o, base.reshape(1, 1)], dim = 1)

                new_model.layer_out.weight.data = new_w_o

        self.model = new_model
        self.hidden_dim +=3
        

In [48]:
model = torch.load("unacceptable/wt.pth")
eps, pred = eps_for_each(train_loader, model)

In [None]:
# model, vs cram.model
model = torch.load("unacceptable/wt.pth")
config_cramming = {
    "eps_bound": np.percentile(eps.detach().numpy(), 90), 
    "s": 0.01,
}
cram = cramming(train_loader, X_train, y_train, **config_cramming)
cram.cram()
cram.model

In [50]:

criterion = nn.MSELoss()
loss_train = 0
for _, (X, y) in enumerate(train_loader):
    pred = cram.model(X)
    loss_train_ = criterion(pred, y)
    loss_train += loss_train_.item()
loss_train /= len(train_loader)

loss_test = 0
for _, (X, y) in enumerate(test_loader):
    pred_test = cram.model(X)
    loss_test_ = criterion(pred_test, y)
    loss_test += loss_test_.item()
loss_test /= len(test_loader)

loss_test, loss_train

(845.5683345794678, 23.17460170158973)

In [164]:
# check the weights
model.layer_1.weight.data, new_model.layer_1.weight.data.shape
model.layer_1.bias.data, new_model.layer_1.bias.data
model.layer_out.weight.data, new_model.layer_out.weight.data
# check grad
# model.layer_1.weight.requires_grad, new_model.layer_1.weight.requires_grad

(tensor([[ 0.0073,  0.0435,  0.0546, -0.2061, -0.1659,  0.1690, -0.1748,  0.1323,
          -0.1250, -0.1714,  0.0733, -0.0514,  0.0334,  0.1305, -0.0112,  0.1752,
          -0.0449,  0.0449, -0.0729, -0.1455]]),
 tensor([[ 7.2531e-03,  4.3481e-02,  5.4579e-02, -2.0614e-01, -1.6595e-01,
           1.6896e-01, -1.7480e-01,  1.3230e-01, -1.2500e-01, -1.7137e-01,
           7.3287e-02, -5.1439e-02,  3.3421e-02,  1.3048e-01, -1.1244e-02,
           1.7522e-01, -4.4895e-02,  4.4885e-02, -7.2886e-02, -1.4554e-01,
           2.1068e+04, -4.2136e+04,  2.1068e+04,  1.0528e+05, -2.1057e+05,
           1.0528e+05,  1.9568e+04, -3.9137e+04,  1.9568e+04, -1.7484e+04,
           3.4968e+04, -1.7484e+04, -4.7019e+04,  9.4038e+04, -4.7019e+04,
           1.1945e+04, -2.3890e+04,  1.1945e+04]]))