In [1]:
%load_ext autoreload
%autoreload 2
import pandas as pd
from module.Weight_tune import *
from module.Reorg import *
from module.Cram import *
from module.Init import *
from module.LTS import *
from module.Data import *
import datetime
# from utils import *


# New Learning mechanism

In [2]:
#################################################################
# Full step for the learning algorithm mechanism
# NOTE
# 1. model right before reorg always need to be acceptable model
# 2. model after cram and reorg always need to be acceptable 
# 3. check for the above every time after cram and reorg
# 4. the randomness: in cram find r
#################################################################
from module.Cram import *
from module.Reorg import *
from module.Weight_tune import *

In [9]:
class NewLearningAlgorithm():
    def __init__(self):
        # LOG
        file_create_time = str(datetime.datetime.now().date())
        self.out_file = open(f"log/{file_create_time}" + '.txt', 'w')
        write(self.out_file, f"#######################################")
        write(self.out_file, str(datetime.datetime.now()))
        write(self.out_file, f"#######################################")
        # DATA
        data = pd.read_csv("Copper_forecasting_data.csv")
        (
            self.X_train, 
            self.y_train, 
            self.X_valid, 
            self.y_valid, 
            self.X_test, 
            self.y_test
        ) = preprocess(data, new_learning_algorithm=False)
        # TRAIN SET
        self.input_dim = 18
        self.dtype = torch.float64
        self.criterion = nn.MSELoss()
        self.lr_rate = .0001
        self.lr_bound = 1e-8
        self.epochs = 50
        self.batch_size = 100
        self.learning_goal = None
        self.model = None
        self.trainloader = None
        self.validloader = None
        self.testloader = None
        # CONFIG
        self.config_wt = None
        self.config_cram = None
        self.config_reorg  = None
        
    def data(self):
        self.trainloader = loader(self.X_train, self.y_train)
        self.validloader = loader(self.X_valid, self.y_valid)
        self.testloader = loader(self.X_test, self.y_test)
    
    def init_model_config(self, thres=80):
        print('INIT MODEL')
        while True:
            model = TwoLayerNet(18, 1, 1).to(device)
            optimizer = optim.Adam(model.parameters(), lr=0.001)
            train_losses = []
            val_losses = []   
            val_min_loss = 1000
            for epoch in tqdm(range(100)):
                train_loss = 0
                # forward operation
                model.train()
                for X, y in self.trainloader:
                    X, y = X.to(device), y.to(device)        
                    optimizer.zero_grad()
                    preds = model(X)
                    loss = self.criterion(preds, y)
                    loss.backward()
                    optimizer.step()
                    train_loss += loss.item()
                train_losses.append(train_loss)
                with torch.no_grad():
                    val_loss = 0
                    model.eval()
                    for X, y in self.validloader:
                        X, y = X.to(device), y.to(device)
                        preds = model(X)
                        loss = self.criterion(preds, y)
                        val_loss += loss.item()
                    val_losses.append(val_loss)
                    if val_loss < val_min_loss:
                        val_min_loss = val_loss
                        self.model = model
            if self.criterion(self.model(self.X_valid), self.y_valid) < .0002:
                break
        # CONFIG        
        self.learning_goal = torch.tensor(
            np.percentile(((self.model(self.X_train) - self.y_train)**2).cpu().detach().numpy(), thres))\
            .to(dtype = self.dtype).to(device)        
        self.config_wt = {
            "epochs": 10,
            "criterion": self.criterion,        # loss function
            "lr_rate": self.lr_rate,            # learning rate 
            "lr_bound": self.lr_bound,          # lower bound of learning rate 
            "lr_goal": self.learning_goal,      # if regular eps < eps_reg: accept the model
        }
        self.config_cram = {
            "lr_goal": self.learning_goal, 
            "s": 0.001,                     # a small num in cram
        }
        self.config_reorg  = {
            "epochs": 1,
            "criterion": self.criterion,        # loss function
            "lr_rate": 0.00001,            # learning rate 
            "lr_bound": self.lr_bound,          # lower bound of learning rate 
            "lr_goal": self.learning_goal,      # if regular eps < eps_reg: accept the model
            "print_reg": False,            # print detail, eg. loss for each epoch, or not
            "print_w_tune": False,         # print detail, eg. loss for each epoch, or not
            "validate_run": False,         # validate the model, or not
        }
     
    def train(self, print_ = False):
        n, n_not_fit = 0, 100
        
        while n < len(self.X_train):
            write(self.out_file, f"---------> Start New lts")
            # Obtaining_LTS / selecting_LTS
            self.trainloader, indices, _, _, n = \
                lts(self.model, self.X_train, self.y_train, self.learning_goal, n_not_fit, self.out_file)
            acceptable, eps_sqaure, _ = acceptable_eps_ypred(self.trainloader, self.model, self.learning_goal)
            torch.save(self.model, "unacceptable/selecting.pth")
            if acceptable:
                continue

            # Weight-Tune
            write(self.out_file, f"---------> Start WEIGHT TUNE\n")
            acceptable, self.model, train_loss_list, test_loss_list = \
                module_weight_EU_LG_UA(self.model, self.trainloader, self.validloader, self.out_file, **self.config_wt)
            
            if acceptable:
                write(self.out_file, "---------> Start REORG (accpetable wt)\n")   
                write(self.out_file, f"model after wt: {self.model}") if print else None
                pre_module = "wt"
            else:
                write(self.out_file, "---------> Start CRAM and REORG (unacceptable wt)\n")

                # Cram
                self.model = torch.load("unacceptable/selecting.pth")    
                acceptable, eps_square_before, _ = acceptable_eps_ypred(self.trainloader, self.model, self.learning_goal)
                cram = cramming(self.model, self.X_train[indices], self.y_train[indices], self.out_file, **self.config_cram)            
                cram.cram() 
                self.model = cram.model
                acceptable, eps_square, _ = acceptable_eps_ypred(self.trainloader, self.model, self.learning_goal)
                write(self.out_file, f"eps_sqaure (last 10) after cram: {eps_square[-10:].reshape(-1)}\n", False)
                write(self.out_file, f"eps_square (last 10) before cram: {eps_square_before[-10:].reshape(-1)}\n", False)
                assert acceptable, f"weird cram, max eps_square{max(eps_sqaure)}"
                pre_module = "Cram"
                
            # Reorganising
            """
            reorg = reorganising(pre_module, self.trainloader, self.testloader, self.out_file, **self.config_reorg)
            reorg.reorganising()
            self.model = reorg.model 
            acceptable, _, _ = acceptable_eps_ypred(self.trainloader, self.model, self.learning_goal)
            assert acceptable, f"weird reorg, max eps_square{max(eps_sqaure)}"
            """
            
        torch.save(self.model, 'result/model.pth')
        
    def evaluate_test(self):
        print(f"train loss: {self.criterion(self.model(self.X_train), self.y_train)}")
        print(f'train residual max {torch.max((self.model(self.X_train) - self.y_train)**2)}')
        print(f"test loss: {self.criterion(self.model(self.X_test), self.y_test)}")
        print(f'test residual max {torch.max((self.model(self.X_test) - self.y_test)**2)}')
    
    def evaluate_valid(self):
        validate_loss = self.criterion(self.model(self.X_valid), self.y_valid)
        validate_residual_max = torch.max((self.model(self.X_valid) - self.y_valid)**2)
        print(f"valid loss: {validate_loss}")
        print(f'valid residual max {validate_residual_max}')
        """
        print(f"train loss: {
            self.criterion(self.model(self.X_train), self.y_train)}")
        print(f'train residual max {torch.max((self.model(self.X_train) - self.y_train)**2)}')
        """
        return validate_loss, validate_residual_max

In [None]:
threses = [50, 80, 90, 95, 98]
min_of_val_res_max = 100
nla = NewLearningAlgorithm()
nla.data()
for thres in threses:
    print('=======================================')
    print('[NEW TRIAL]')
    nla.init_model_config(thres)
    print('=====')
    print('Test Time')
    nla.evaluate_test()
    print('=====')
    nla.train(thres)
    val_loss, val_res_max = nla.evaluate_valid()
    print('=====')
    print('Test Time')
    nla.evaluate_test()
    print('=====')
    if min_of_val_res_max > val_res_max:
        print('BEST THRES!!!!!!!!!!!!!!!!!!!!')
        min_of_val_res_max = val_res_max
        best_thres = thres
        best_model = nla.model
        best_nla = nla
        torch.save(best_model, 'result/best_model.pth')

In [12]:
best_nla.evaluate_test()

train loss: 0.0002499454853479313
train residual max 0.0037278311435575253
test loss: 0.0012928470864951036
test residual max 0.007970902348198992


In [None]:
threses = [50, 80, 90, 95, 98]
min_of_val_res_max = 100
nla = NewLearningAlgorithm()
nla.data()
for thres in threses:
    print('=======================================')
    print('[NEW TRIAL]')
    nla.init_model_config(thres)
    if min_of_val_res_max > val_res_max:
        print('BEST THRES!!!!!!!!!!!!!!!!!!!!')
        min_of_val_res_max = val_res_max
        best_thres = thres
        best_model = nla.model
        best_nla = nla

In [14]:
best_nla.evaluate_test()

train loss: 0.0007156500724556761
train residual max 0.012368260482324613
test loss: 0.0009486841600028468
test residual max 0.006449873076933106
