In [1]:
%load_ext autoreload
%autoreload 2
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
import torch, copy
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as Data
import torch.nn.functional as F
import torchvision
import  matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from module.model import TwoLayerNet
from module.Weight_tune import *
from module.Reorg import *
from module.Cram import *
from module.init import *
from module.lts import *
from module.utils import *
from module.data import *
import datetime





# New Learning mechanism

### Data

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
dtype = torch.float64

data = pd.read_csv('Copper_forecasting_data.csv')

sc = StandardScaler()
X = data.drop(["y"], axis = 1)
X = sc.fit_transform(X)
y = data["y"] / 1000

train_size = int(len(X)*0.8)
batch_size = 30

X_train = X[:train_size, :]
y_train = y[:train_size]
X_test = X[train_size:, :]
y_test = y[train_size:] 

X_train = torch.tensor(np.array(X_train), dtype=dtype)
X_test = torch.tensor(np.array(X_test), dtype=dtype)
y_train = torch.tensor(np.array(y_train), dtype=dtype)
y_test = torch.tensor(np.array(y_test), dtype=dtype)
input_dim = X_train.shape[1]
    

test_loader = torch.utils.data.DataLoader(
    MyDataset(X_test.to(device), y_test.to(device)), 
    batch_size = batch_size, 
    shuffle=False, 
    drop_last = False)


### Model

In [3]:
model = TwoLayerNet(input_dim, 1, 1)

### Config

In [19]:
# learning goal: max eps < learning goal
learning_goal = torch.exp(torch.tensor(1)).to(dtype = dtype)
lr_rate = .01
lr_bound = 1e-5
criterion = nn.MSELoss()
epochs = 50
"""
# Note
1. hidden dim should check the previous model. dynamically change
2. For lr_rate, lr bound, lr goal (eps bound) are all the same fro eahc module
"""
config_wt = {
    "epochs": epochs,
    "criterion": criterion,        # loss function
    "lr_rate": lr_rate,            # learning rate 
    "lr_bound": lr_bound,          # lower bound of learning rate 
    "lr_goal": learning_goal,      # if regular eps < eps_reg: accept the model
}

config_cram = {
    "lr_goal": learning_goal, 
    "s": 0.001,                     # a small num in cram
}

config_reorg  = {
    "epochs": epochs,
    "criterion": criterion,        # loss function
    "lr_rate": lr_rate,            # learning rate 
    "lr_bound": lr_bound,          # lower bound of learning rate 
    "lr_goal": learning_goal,      # if regular eps < eps_reg: accept the model
    "print_reg": False,            # print detail, eg. loss for each epoch, or not
    "print_w_tune": False,         # print detail, eg. loss for each epoch, or not
    "validate_run": False,         # validate the model, or not
}
# NOTE 
# 1. for leaning goals, if first using weightune and no LTS or otherthings, 
# 13 for getting acceptable wt | 10 not acceptable


### Full Path

- L11 p9, third learning mechanism

In [None]:
# check the random of init. Should not be random in init.
k = 0
loss = []
while k<2:
    model = init_model(X_train, y_train)
    # 2. obtaining_LTS / selecting_LTS
    train_loader, indices, n = lts(model, X_train, y_train, learning_goal)

    # 3. check learning goal
    acceptable, model, train_loss_list, test_loss_list = \
        module_weight_EU_LG_UA(model, train_loader, test_loader, **config_wt)
    acceptable, eps, y_pred = check_acceptable(train_loader, model, learning_goal)
    loss.append(train_loss_list)
    k+=1

In [None]:
# Result file
file_create_time = str(datetime.datetime.now())
file = open(file_create_time + '.txt', 'a')

In [20]:
#################################################################
# Full step for the learning algorithm mechanism
# NOTE
# 1. model right before reorg always need to be acceptable model
# 2. model after cram and reorg always need to be acceptable 
# 3. check for the above every time after cram and reorg
# 4. the randomness: in cram find r
#################################################################

# 1. initializing_1_ReLU_LR | L11 p2
model = init_model(X_train, y_train)
n = 0
model_history = {}


while n < len(X_train):
    print(f"################################## New lts #######################################")
    models_within_iter = {}
    # 2. obtaining_LTS / selecting_LTS
    train_loader, indices, X_train_lts, y_train_lts, n = lts(model, X_train, y_train, learning_goal)

    # 3. check learning goal
    acceptable, eps_sqaure, y_pred = check_acceptable(train_loader, model, learning_goal)
    models_within_iter["begin"] = model
    if acceptable:
        continue

    else:
        torch.save(model, "unacceptable/selecting.pth")

        # store model in acceptable/wt.pth if acceptable
        # store model in unacceptable/wt.pth if not acceptable
        acceptable, model, train_loss_list, test_loss_list = \
            module_weight_EU_LG_UA(model, train_loader, test_loader, **config_wt)
        models_within_iter["wt"] = model
        
        if acceptable:
            print("////////// Start REORG with accpetable wt //////////")   
            print(f"model after wt: {model}")

            # load model in acceptable/wt.pth if wt acceptable
            # NOTE: the loaded model in reorg module is the same with "model" now
            pre_module = "wt"
            reorg = reorganising(pre_module, train_loader, test_loader, **config_reorg)
            reorg.reorganising()
            model = reorg.model
            models_within_iter["reorg"] = model
            print(f"model after reorg: {model}")

        else:
            print("////////// Start CRAM and REORG with unacceptable wt //////////")

            # restore model after wt, unacceptable
            model = torch.load("unacceptable/selecting.pth")    
            acceptable, eps_square, y_pred = check_acceptable(train_loader, model, learning_goal)
            print(f"model after wt: {model}")
            print(f"eps_square (last 10) before cram: {eps_square[-10:]}")

            # load model in unacceptable/wt.pth if wt not acceptable       
            # store acceptable cram in acceptable/cram.pth 
            cram = cramming(model, X_train[indices], y_train[indices], **config_cram)            
            cram.cram() 
            model = cram.model
            models_within_iter["cram"] = model
            print(f"model after cram: {model}")            

            # check accetable
            acceptable, eps_square, y_pred = check_acceptable(train_loader, model, learning_goal)
            print(f"eps_sqaure (last 10) after cram: {eps_square[-10:]}")

            if not acceptable:
                print("weird cram")
                break

            # load model in acceptable/cram.pth if cram acceptable
            # store acceptable cram in acceptable/cram.pth
            pre_module = "cram"
            reorg = reorganising(pre_module, train_loader, test_loader, **config_reorg)
            reorg.reorganising()
            model = reorg.model
            models_within_iter["reorg"] = model
            print(f"model after reorg: {model}")

            # check 
            acceptable, eps_square, y_pred = check_acceptable(train_loader, model, learning_goal)
            if acceptable:
                pass
            else:
                print("weird reorg")
                break
            
    model_history[n] = models_within_iter

    

################################## New lts #######################################
Total obtaining n: 328
obtaining n over lr goal: 0
Total select n: 329
select n over lr goal: 1
Save model and lr increase9156 ---------

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


lr too small1.60525109870369683 ---------
non acceptable module at max eps tensor([11.0633], dtype=torch.float64, grad_fn=<UnbindBackward0>)
////////// Start CRAM and REORG with unacceptable wt //////////
model after wt: TwoLayerNet(
  (layer_1): Linear(in_features=18, out_features=1, bias=True)
  (layer_out): Linear(in_features=1, out_features=1, bias=True)
  (relu): ReLU()
)
eps_square (last 10) before cram: tensor([[6.5449],
        [6.6698],
        [6.8589],
        [6.9040],
        [6.9042],
        [7.2052],
        [7.2850],
        [7.3423],
        [7.3837],
        [7.5201]], dtype=torch.float64, grad_fn=<SliceBackward0>)
cramming sample 328th |0.00% total of 1
model after cram: TwoLayerNet(
  (layer_1): Linear(in_features=18, out_features=4, bias=True)
  (layer_out): Linear(in_features=4, out_features=1, bias=True)
  (relu): ReLU()
)
eps_sqaure (last 10) after cram: tensor([[6.5449e+00],
        [6.6698e+00],
        [6.8589e+00],
        [6.9040e+00],
        [6.9042e+00]

In [2]:
model = torch.load("unacceptable/selecting.pth")    

target_dtype = torch.float64
for param in model.parameters():
    param.data = param.data.to(dtype=target_dtype)
    if param.grad is not None:
        param.grad.data = param.grad.data.to(dtype=target_dtype)

In [15]:
X_train

tensor([[ 0.9495,  1.7060,  1.4923,  ..., -1.3061, -0.5316,  1.7838],
        [ 0.9954,  1.4859,  1.5214,  ..., -1.3392, -0.5316,  1.7838],
        [ 0.8272,  1.5149,  1.2368,  ..., -1.3392, -0.5316,  1.5801],
        ...,
        [-1.0842, -0.0204, -0.1012,  ...,  0.8492,  1.1269, -0.5588],
        [-1.0383, -0.1033, -0.1423,  ...,  0.8492,  1.0956, -0.5588],
        [-0.9466, -0.1442, -0.1565,  ...,  0.7829,  0.9078, -0.4569]],
       dtype=torch.float64)

In [16]:
from module.Cram import cramming

train_loader, indices, X_train_lts, y_train_lts, n = lts(
                                model, 
                                X_train, 
                                y_train, 
                                learning_goal
                                )

print(f"model after wt: {model}")
acceptable, eps, y_pred = check_acceptable(train_loader, model, learning_goal)
# load model in unacceptable/wt.pth if wt not acceptable       
# store acceptable cram in acceptable/cram.pth 
cram = cramming(model, X_train[indices], y_train[indices], **config_cram)            
cram.cram() 
model = cram.model
# acceptable, eps, y_pred = check_acceptable(train_loader, model, learning_goal)
# acceptable, eps

Total obtaining n: 0
obtaining n over lr goal: 0
Total select n: 1
select n over lr goal: 1
model after wt: TwoLayerNet(
  (layer_1): Linear(in_features=18, out_features=1, bias=True)
  (layer_out): Linear(in_features=1, out_features=1, bias=True)
  (relu): ReLU()
)
cramming sample 0th |0.00% total of 1
try vector 1

ValueError: max() arg is an empty sequence

In [206]:
acceptable, eps, y_pred = check_acceptable(train_loader, model, learning_goal)
acceptable
model

TwoLayerNet(
  (layer_1): Linear(in_features=18, out_features=4, bias=True)
  (layer_out): Linear(in_features=4, out_features=1, bias=True)
  (relu): ReLU()
)

In [20]:
model = init_model(X_train, y_train)
train_loader_lts, indices, n = lts(model, X_train, y_train, learning_goal)
acceptable, eps, y_pred = check_acceptable(train_loader_lts, model, learning_goal)
torch.save(model, "unacceptable/selecting_wt.pth")
indices, eps
eps[eps>learning_goal]

Total obtaining n: 64
obtaining n over lr goal: 0
Total select n: 65
select n over lr goal: 1


tensor([0.3091])

In [None]:
acceptable, model, train_loss_list, test_loss_list = \
            module_weight_EU_LG_UA(model, train_loader_lts, test_loader, **config_wt)
acceptable, eps, y_pred = check_acceptable(train_loader, model, learning_goal)
eps

In [21]:
model = torch.load("unacceptable/selecting_wt.pth")  
acceptable, eps, y_pred = check_acceptable(train_loader_lts, model, learning_goal)
eps[eps>learning_goal]

tensor([0.3091])

In [190]:

from module.Cram import *
config_cram["s"] = .001
model = torch.load("unacceptable/selecting_wt.pth")          
acceptable, eps, y_pred = check_acceptable(train_loader_lts, model, learning_goal)  
print(max(eps))
cram = cramming(
    model, 
    X_train[indices], 
    y_train[indices], 
    **config_cram)
cram.cram()
model = cram.model
acceptable, eps, y_pred = check_acceptable(train_loader_lts, model, learning_goal)  
print(max(eps))

tensor([0.3091])
cramming sample 64th |0.00% total of 1
tensor([0.2946])


In [None]:

relu = nn.ReLU()
for i in range(len(indices)):
    w = relu(torch.dot(x[i]-x[64], cram.r) + cram.s) \
        + relu(torch.dot(x[i]-x[64], cram.r) - cram.s) \
        - 2*relu(torch.dot(x[i]-x[64], cram.r)) 
    print(w)

In [187]:
a = cram.model.layer_1.weight.data @ x[64].T
a_ = model.layer_1.weight.data @ x[64].T
print(a_, a)
print("\n")
print(cram.model.layer_1.bias.data)
a_ = a_ + model.layer_1.bias.data
a = a.T + cram.model.layer_1.bias.data
print(relu(a_), relu(a))
print("\n")
print(model.layer_out.weight.data, cram.model.layer_out.weight.data)
print(model.layer_out.weight.data @ relu(a_),
      cram.model.layer_out.weight.data @ relu(a))
a_ = model.layer_out.weight.data @ relu(a_) + model.layer_out.bias.data.reshape(1, 1)
a = cram.model.layer_out.weight.data @ relu(a) + cram.model.layer_out.bias.data.reshape(1, 1)
print(model.layer_out.bias.data.reshape(1, 1), cram.model.layer_out.bias.data.reshape(1, 1))
print(a, a_)

tensor([8.3716]) tensor([8.3716, 5.6251, 5.6251, 5.6251])


tensor([14.3434, -5.6241, -5.6251, -5.6261])
tensor([22.7149]) tensor([2.2715e+01, 9.9993e-04, 0.0000e+00, 0.0000e+00])


tensor([[1.]]) tensor([[   1.0000,  309.0630, -618.1259,  309.0630]])
tensor([22.7149]) tensor([23.0240])
tensor([[34.4350]]) tensor([[0.0373]])
tensor([[23.0613]]) tensor([[57.1499]])


In [83]:
model(X_train)

tensor([[21.9945],
        [59.9945],
        [47.9945],
        [23.9945],
        [59.9945],
        [75.9945],
        [53.9945],
        [57.9945],
        [53.9945],
        [31.9945],
        [55.9945],
        [51.9945],
        [65.9945],
        [47.9945],
        [39.9945],
        [39.9945],
        [ 7.9945],
        [47.9945],
        [57.9945],
        [ 3.9945],
        [39.9945],
        [11.9945],
        [53.9945],
        [61.9945],
        [59.9945],
        [63.9945],
        [51.9945],
        [71.9945],
        [59.9945],
        [67.9945],
        [51.9945],
        [47.9945],
        [35.9945],
        [27.9945],
        [71.9945],
        [63.9945],
        [49.9945],
        [57.9945],
        [55.9945],
        [57.9945],
        [57.9945],
        [67.9945],
        [59.9945],
        [49.9945],
        [71.9945],
        [43.9945],
        [75.9945],
        [35.9945],
        [63.9945],
        [75.9945],
        [63.9945],
        [51.9945],
        [59.