In [None]:
# model utils -> 和模型相關的函式
import torch
import os
from collections import OrderedDict
def freeze(model):
    for p in model.parameters():
        p.requires_grad = False
def unfreeze(model):
    for p in model.parameters():
        p.requires_grad = True
def is_frozen(model):
    x = [p.requires_grad for p in model.parameters()]
    return not all(x)
def save_checkpoint(model_dir, state, session):
    epoch = state['epoch']
    model_out_path = os.path.join(model_dir, "model_epoch_{}_{}.pth".format(epoch, session))
    torch.save(state, model_out_path)
def load_checkpoint(model, weights):
    checkpoint = torch.load(weights)
    try:
        model.load_state_dict(checkpoint["state_dict"])
    except:
        state_dict = checkpoint["state_dict"]
        new_state_dict = OrderedDict()
        for k, v in state_dict.items():
            name = k[7:]  # remove `module.`
            new_state_dict[name] = v
        model.load_state_dict(new_state_dict)
def load_start_epoch(weights):
    checkpoint = torch.load(weights)
    epoch = checkpoint["epoch"]
    return epoch
def load_optim(optimizer, weights):
    checkpoint = torch.load(weights)
    optimizer.load_state_dict(checkpoint['optimizer'])
    # for p in optimizer.param_groups: lr = p['lr']
    # return lr
def network_parameters(nets):
    num_params = sum(param.numel() for param in nets.parameters())
    return num_params   # /1e6


In [None]:
# dir -> 和路徑有關的函式
import os
from natsort import natsorted
from glob import glob


def mkdirs(paths):
    if isinstance(paths, list) and not isinstance(paths, str):
        for path in paths:
            mkdir(path)
    else:
        mkdir(paths)


def mkdir(path):
    if not os.path.exists(path):
        os.makedirs(path)


def get_last_path(path, session):
    x = natsorted(glob(os.path.join(path, '*%s' % session)))[-1]
    return x


In [None]:
# csv -> 和csv有關的函式
import csv
import os


def write_csv(data=None, csv_path=None, save_name='result'):
    if not os.path.exists(csv_path):
        os.mkdir(csv_path)
    headerList = ['Number', 'Predict']
    with open(csv_path + "/" + save_name + '.csv', 'w', newline='') as f:
        w = csv.writer(f)
        dw = csv.DictWriter(f, delimiter=',', fieldnames=headerList)
        dw.writeheader()
        for i in range(len(data)):
            name = data[i][0]
            predict = data[i][1]
            w.writerow([name, predict])
    f.close()


In [None]:
# score -> 和分數計算有關的函式

def calculate_score_A(x):
    return x * 70


def calculate_score_B(y):
    score_B = 0
    if y <= 5:
        score_B = 30
    elif 5 < y <= 7.5:
        score_B = 25
    elif 5 < y <= 7.5:
        score_B = 20
    elif 7.5 < y <= 10:
        score_B = 25
    elif 10 < y <= 12.5:
        score_B = 17.5
    elif 12.5 < y <= 15:
        score_B = 15
    elif 15 < y <= 17.5:
        score_B = 10
    elif 15 < y <= 17.5:
        score_B = 5
    elif y > 20:
        score_B = 0
    return score_B


In [None]:
# dataset.py -> 整理data，並換為可訓練資料(Normalization)
from torch.utils.data import Dataset
import pandas as pd
import torch
from sklearn.preprocessing import StandardScaler

# dataset definition
class TrainDataset(Dataset):

    # load the dataset
    def __init__(self, train_path=None):
        # 讀csv檔併分割訓練資料欄位()與答案欄位()
        train_out = pd.read_csv(train_path)
        inputs = train_out.iloc[:, 1:14].values
        outputs = train_out.iloc[:, 14].values

        # feature scaling
        sc = StandardScaler()
        inputs_train = sc.fit_transform(inputs) # sc.fit_transform()
        outputs_train = outputs


        # 轉成張量(tensor)
        self.inputs_train = torch.tensor(inputs_train, dtype=torch.float32)
        self.outputs_train = torch.tensor(outputs_train, dtype=torch.float32).view(-1, 1)

    def __len__(self):
        return len(self.outputs_train)

    def __getitem__(self, idx):
        return self.inputs_train[idx], self.outputs_train[idx]


class ValDataset(Dataset):

    # load the dataset
    def __init__(self, train_path=None, val_path=None):
        # 讀csv檔併分割訓練資料欄位()與答案欄位()
        train_out = pd.read_csv(train_path)
        train_total = train_out.iloc[:, 1:14].values

        val_out = pd.read_csv(val_path)
        inputs = val_out.iloc[:, 1:14].values
        outputs = val_out.iloc[:, 14].values

        # feature scaling
        sc = StandardScaler()
        matrix = sc.fit_transform(train_total)
        inputs_train = sc.transform(inputs)
        outputs_train = outputs

        # 轉成張量(tensor)
        self.inputs_train = torch.tensor(inputs_train, dtype=torch.float32)
        self.outputs_train = torch.tensor(outputs_train, dtype=torch.float32).view(-1, 1)

    class ValDataset(Dataset):

        # load the dataset
        def __init__(self, train_path=None, val_path=None):
            # 讀csv檔併分割訓練資料欄位()與答案欄位()
            train_out = pd.read_csv(train_path)
            train_total = train_out.iloc[:, 1:14].values

            val_out = pd.read_csv(val_path)
            inputs = val_out.iloc[:, 1:14].values
            outputs = val_out.iloc[:, 14].values

            # feature scaling
            sc = StandardScaler()
            matrix = sc.fit_transform(train_total)
            inputs_train = sc.transform(inputs)
            outputs_train = outputs

            # 轉成張量(tensor)
            self.inputs_train = torch.tensor(inputs_train, dtype=torch.float32)
            self.outputs_train = torch.tensor(outputs_train, dtype=torch.float32).view(-1, 1)

    def __len__(self):
        return len(self.outputs_train)

    def __getitem__(self, idx):
        return self.inputs_train[idx], self.outputs_train[idx]


class TestDataset(Dataset):

    # load the dataset
    def __init__(self, train_path=None, test_path=None):
        # 讀csv檔併分割訓練資料欄位()與答案欄位()
        train_out = pd.read_csv(train_path)
        train_total = train_out.iloc[:, 1:14].values

        test_out = pd.read_csv(test_path)
        inputs = test_out.iloc[:, 1:14].values
        outputs = test_out.iloc[:, 14].values
        file_names = test_out.iloc[:, 0].values

        # feature scaling
        sc = StandardScaler()
        matrix = sc.fit_transform(train_total)
        inputs_test = sc.transform(inputs)
        outputs_test = outputs
        names = file_names

        # 轉成張量(tensor)
        self.inputs_test = torch.tensor(inputs_test, dtype=torch.float32)
        self.outputs_test = torch.tensor(outputs_test, dtype=torch.float32).view(-1, 1)
        self.file_name = torch.tensor(names, dtype=torch.int32).view(-1, 1)

    def __len__(self):
        return len(self.inputs_test)

    def __getitem__(self, idx):
        return self.inputs_test[idx], self.outputs_test[idx], self.file_name[idx]

In [None]:
# model.py -> 模型架構
import torch.nn as nn


# model definition
class MLP(nn.Module):
    # define model elements
    def __init__(self, n_inputs, hidden_layer1, hidden_layer2, hidden_layer3):
        super(MLP, self).__init__()
        self.layer_1 = nn.Linear(n_inputs, hidden_layer1)
        self.act1 = nn.PReLU()
        self.layer_2 = nn.Linear(hidden_layer1, hidden_layer2)
        self.act2 = nn.PReLU()
        self.layer_3 = nn.Linear(hidden_layer2, hidden_layer3)
        self.act3 = nn.PReLU()
        self.layer_4 = nn.Linear(hidden_layer3, 1)
        self.act4 = nn.LeakyReLU(0.2)

    # forward propagate input
    def forward(self, x):
        x = self.act1(self.layer_1(x))
        x = self.act2(self.layer_2(x))
        x = self.act3(self.layer_3(x))
        x = self.act4(self.layer_4(x))
        return x


In [None]:
# install tensorboardX
!pip install tensorboardx



In [None]:
# Training parameter setting

Network = 'MLP_128_256_128' #網路名字
EPOCH = 10
LR = 0.01
GPU = True
BATCH = 100
VAL_AFTER_EVERY = 5 #每幾個epoch存一次模型
TRAIN_DIR = '/content/train.csv' # path to training data
VAL_DIR = '/content/val.csv' # path to validation data
SAVE_DIR = '/content' # path to save models and images



In [None]:
#--------------------------------Start training !----------------------------#
import yaml
import os
import torch

torch.backends.cudnn.benchmark = True

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, random_split

import random
import time
import numpy as np

from tqdm import tqdm
from tensorboardX import SummaryWriter

## Set Seeds
random.seed(1234)
np.random.seed(1234)
torch.manual_seed(1234)
torch.cuda.manual_seed_all(1234)

## Model and log path direction
print('==> Build folder path')
start_epoch = 1
network_dir = os.path.join(SAVE_DIR, Network)
mkdir(network_dir)
save_dir = os.path.join(network_dir)
mkdir(save_dir)
model_dir = os.path.join(network_dir, 'models')
mkdir(model_dir)
log_dir = os.path.join(network_dir, 'log')
mkdir(log_dir)
train_dir = TRAIN_DIR
val_dir = VAL_DIR

writer = SummaryWriter(log_dir=log_dir, filename_suffix=f'_log')

# GPU device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Model
print('==> Build model')
model = MLP(n_inputs=13, hidden_layer1=128, hidden_layer2=128, hidden_layer3=64)
if GPU:
    model.to(device=device)

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

# Loss function
criterion = nn.L1Loss()

# DataLoaders
print('==> Data preparation')
train_dataset = TrainDataset(train_dir)
train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH,
                          shuffle=True, num_workers=0, drop_last=False, pin_memory=False)
val_dataset = ValDataset(train_dir, val_dir)
val_loader = DataLoader(dataset=val_dataset, batch_size=BATCH, shuffle=False, num_workers=0,
                        drop_last=False, pin_memory=False)

print(f'''==> Training details:
------------------------------------------------------------------------------
    Network:          {Network}
    Training data:      {len(train_dataset)}
    Validation data:    {len(val_dataset)}
    Start/End epochs:   {str(start_epoch) + '~' + str(EPOCH + 1)}
    Batch sizes:        {BATCH}
    Learning rate:      {LR}
    GPU:                {GPU}''')
print('------------------------------------------------------------------------------')

# train
best_val_loss = 10000
best_epoch = 0
for epoch in range(start_epoch, EPOCH + 1):
    epoch_loss = 0
    model.train()
    for i, data in enumerate(tqdm(train_loader, ncols=70, total=len(train_loader), leave=True), 0):

        for param in model.parameters():
            param.grad = None

        inputs = data[0].cuda()
        GT = data[1].cuda()
        out = model(inputs)
        loss = criterion(out, GT)

        # optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(' Epoch [{:3d}/{}]: \tLoss: {:.4f}\t'.format(epoch, EPOCH + 1, epoch_loss / len(train_loader)))
    print('------------------------------------------------------------------')
    torch.save({'epoch': epoch,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict()
                }, os.path.join(model_dir, 'model_latest.pth'))

    # validation (evaluation)
    if epoch % VAL_AFTER_EVERY == 0:
        model.eval()
        epoch_val_loss = 0
        for ii, data_val in enumerate(val_loader, 0):
            inputs = data_val[0].cuda()
            GT = data_val[1].cuda()
            with torch.no_grad():
                out = model(inputs)
            val_loss = criterion(out, GT)
            epoch_val_loss += val_loss.item()

        if epoch_val_loss < best_val_loss:
            best_val_loss = epoch_val_loss
            best_epoch = epoch
            torch.save({'epoch': epoch,
                        'state_dict': model.state_dict(),
                        'optimizer': optimizer.state_dict()
                        }, os.path.join(model_dir, 'model_best.pth'))

        print('[epoch %d Loss: %.4f --- best_epoch %d Best_loss %.4f]' % (
            epoch, epoch_val_loss / len(val_loader), best_epoch, best_val_loss / len(val_loader)))

        torch.save({'epoch': epoch,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict()
                    }, os.path.join(model_dir, f'model_epoch_{epoch}.pth'))
        writer.add_scalar('val/loss', epoch_val_loss / len(val_loader), epoch)
    writer.add_scalar('train/loss', epoch_loss / len(train_loader), epoch)
writer.close()


==> Build folder path
==> Build model
==> Data preparation
==> Training details:
------------------------------------------------------------------------------
    Network:          MLP_128_256_128
    Training data:      97000
    Validation data:    1000
    Start/End epochs:   1~11
    Batch sizes:        100
    Learning rate:      0.01
    GPU:                True
------------------------------------------------------------------------------


100%|██████████████████████████████| 970/970 [00:02<00:00, 392.23it/s]


 Epoch [  1/11]: 	Loss: 4.6942	
------------------------------------------------------------------


100%|██████████████████████████████| 970/970 [00:02<00:00, 379.72it/s]


 Epoch [  2/11]: 	Loss: 2.9401	
------------------------------------------------------------------


100%|██████████████████████████████| 970/970 [00:02<00:00, 407.68it/s]


 Epoch [  3/11]: 	Loss: 2.4107	
------------------------------------------------------------------


100%|██████████████████████████████| 970/970 [00:02<00:00, 408.40it/s]


 Epoch [  4/11]: 	Loss: 2.0777	
------------------------------------------------------------------


100%|██████████████████████████████| 970/970 [00:02<00:00, 408.04it/s]


 Epoch [  5/11]: 	Loss: 1.8547	
------------------------------------------------------------------
[epoch 5 Loss: 1.4942 --- best_epoch 5 Best_loss 1.4942]


100%|██████████████████████████████| 970/970 [00:02<00:00, 406.27it/s]


 Epoch [  6/11]: 	Loss: 1.6287	
------------------------------------------------------------------


100%|██████████████████████████████| 970/970 [00:02<00:00, 401.53it/s]


 Epoch [  7/11]: 	Loss: 1.5285	
------------------------------------------------------------------


100%|██████████████████████████████| 970/970 [00:02<00:00, 405.80it/s]


 Epoch [  8/11]: 	Loss: 1.4531	
------------------------------------------------------------------


100%|██████████████████████████████| 970/970 [00:02<00:00, 401.26it/s]


 Epoch [  9/11]: 	Loss: 1.3274	
------------------------------------------------------------------


 93%|███████████████████████████▉  | 904/970 [00:02<00:00, 404.88it/s]

In [None]:
# Testing parameter setting
train_dir = '/content/train.csv'
test_dir = '/content/val.csv'
result_dir = '/content'
model_weights = '/content/model_best.pth'
gpus = True

In [None]:
#--------------------------------Start testing !----------------------------#
import os
from tqdm import tqdm

import torch
from torch.utils.data import DataLoader

model = MLP(n_inputs=13, hidden_layer1=128, hidden_layer2=256, hidden_layer3=128)
# 13, 128, 128, 64, 1


torch.multiprocessing.freeze_support()

load_checkpoint(model, model_weights)
model.cuda()
model.eval()

test_dataset = TestDataset(train_dir, test_dir)
test_loader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False, num_workers=0)

results = []
score = 0
max_single_score = 0
print('===> Start testing~~')
with torch.no_grad():
    for ii, data_test in enumerate(tqdm(test_loader, ncols=70, leave=True), 0):
        torch.cuda.ipc_collect()
        torch.cuda.empty_cache()

        input_ = data_test[0].cuda()
        answer = data_test[1]
        file_names = data_test[2]

        predict = model(input_)
        predict = predict.cpu().numpy()

        for batch in range(len(predict)):
            results.append([file_names[batch].item(), predict[batch].item()])
            single_score = abs(predict[batch].item() - answer[batch].item())
            if single_score < 10:
                score += 1
            if single_score > max_single_score:
                max_single_score = single_score

write_csv(data=results, csv_path=result_dir, save_name='results')

x = score/len(test_dataset)
y = max_single_score
score_A = calculate_score_A(x)
score_B = calculate_score_B(y)
total_score = score_A + score_B
print('===> Finish writing csv data!')

print(f'''
Result: 
----------------------------------
    Score A (70): {score_A}
    Score B (30): {score_B}
    Total  (100): {total_score}
''')


===> Start testing~~


100%|███████████████████████████| 1000/1000 [00:00<00:00, 2763.00it/s]

===> Finish writing csv data!

Result: 
----------------------------------
    Score A (70): 70.0
    Score B (30): 25
    Total  (100): 95.0




