In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import os
import math

In [2]:
file_name = 'data/data_train/training_data.dat'
data = pd.read_csv(file_name)

train, val = train_test_split(data, test_size=0.05, random_state=42)

In [3]:
class CustomDataset(Dataset):
    def __init__(self, initial_c, t12):
        self.input = torch.tensor(initial_c, dtype=torch.float32)
        self.label = torch.tensor(t12, dtype=torch.float32)

    def __len__(self):
        return len(self.label);

    def __getitem__(self, i):
        return self.input[i], self.label[i];

## 训练阶段 training phase

In [5]:
input_columns = [1, 2, 3]
initial_c = train.iloc[:, input_columns].values
output_columns = 5
t12_all = train.iloc[:, output_columns].values    

dataset = CustomDataset(initial_c, t12_all);
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [6]:
class BaselineModel(nn.Module):
    def __init__(self):
        super(BaselineModel, self).__init__()
        self.fc1 = nn.Linear(3, 12)
        self.fc2 = nn.Linear(12, 24)
        self.fc3 = nn.Linear(24, 36)
        self.fc4 = nn.Linear(36, 1)
    
    def forward(self, x):
        x1 = torch.relu(self.fc1(x))
        x2 = torch.relu(self.fc2(x1))
        x3 = torch.nn.functional.leaky_relu(self.fc3(x2))
        x4 = self.fc4(x3)

        return x4

In [7]:
num_epochs = 20
LR = 0.01
model = BaselineModel()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

model.train()
for epoch in range(num_epochs):
    tot_loss = 0
    batch_num = 0
    for input, label in dataloader:
        #print(input)
        #print(label)
        optimizer.zero_grad()
        output = model(input)
        output = torch.squeeze(output)
        loss = criterion(output, label)
        tot_loss += loss
        batch_num += 1
        loss.backward()
        optimizer.step()

    avg_loss = tot_loss / batch_num
    print(f'epoch {epoch}; loss {avg_loss: .4f}')
        

epoch 0; loss  1214865.3750
epoch 1; loss  852620.8125
epoch 2; loss  444502.4062
epoch 3; loss  222953.6094
epoch 4; loss  150381.4375
epoch 5; loss  123546.5938
epoch 6; loss  88629.7031
epoch 7; loss  69200.5156
epoch 8; loss  61511.1211
epoch 9; loss  52092.2734
epoch 10; loss  49667.4492
epoch 11; loss  42341.2148
epoch 12; loss  32712.7246
epoch 13; loss  28690.0703
epoch 14; loss  27885.6230
epoch 15; loss  27326.0078
epoch 16; loss  22551.5918
epoch 17; loss  20555.3203
epoch 18; loss  19853.4023
epoch 19; loss  16729.6387


## 验证阶段 validation phase

In [9]:
input_columns = [1, 2, 3]
initial_c = val.iloc[:, input_columns].values
output_columns = 5
t12_all = val.iloc[:, output_columns].values

In [10]:
num = t12_all.size
input = torch.tensor(initial_c, dtype=torch.float32)
t12_act = torch.tensor(t12_all, dtype=torch.float32)

In [16]:
model.eval()
tot_score = 0

with torch.no_grad():
    for i in range(num):
        output = model(input[i])
        pred = output.item()
        act = t12_act[i].item()
        score = max(0, 1 - math.log(1+0.1*abs(pred-act))/5)
        tot_score += score
        
        print(f't1/2 predicted: {pred: .4f}; t1/2 actual: {act: .4f}; score: {score}')

avg_score = tot_score / num
print(f'final score: {avg_score}')

t1/2 predicted:  210.0074; t1/2 actual:  152.3800; score: 0.6177143411227509
t1/2 predicted:  592.2783; t1/2 actual:  606.2100; score: 0.8254762167383076
t1/2 predicted:  82.1069; t1/2 actual:  19.4200; score: 0.603284817183417
t1/2 predicted:  54.8851; t1/2 actual:  18.1700; score: 0.6917036533587143
t1/2 predicted:  287.0778; t1/2 actual:  235.1800; score: 0.6354200033059325
t1/2 predicted:  426.0826; t1/2 actual:  418.3700; score: 0.8856615881651911
t1/2 predicted:  749.0204; t1/2 actual:  742.8000; score: 0.903262349454684
t1/2 predicted:  524.7344; t1/2 actual:  587.9900; score: 0.601726037909044
t1/2 predicted:  527.6530; t1/2 actual:  490.8200; score: 0.6911996293573929
t1/2 predicted:  1302.4722; t1/2 actual:  1457.9000; score: 0.4388099811863283
t1/2 predicted:  236.1768; t1/2 actual:  151.3600; score: 0.5501276261944597
t1/2 predicted:  1009.9491; t1/2 actual:  1173.3000; score: 0.4294533856484346
t1/2 predicted:  530.8084; t1/2 actual:  583.0900; score: 0.6341837591286845
t1

## 测试阶段 testing phase

### 验证集 validation set
参赛者可在提交平台看到public score

In [6]:
data_file_name = 'data/data_val/val_data_question.dat'
data = pd.read_csv(data_file_name)
input_columns = [1, 2, 3]
initial_c = data.iloc[:, input_columns].values
input_ = torch.tensor(initial_c, dtype=torch.float32)


model.eval()
tot_score = 0
pd_pred = pd.DataFrame(columns = ['Exp #', 't12_simulated'])

with torch.no_grad():
    for i in range(len(initial_c)):
        t12_pred = model(input_[i])
        pred = t12_pred.item()
        pd_pred.loc[i, 'Exp #']= i
        pd_pred.loc[i, 't12_simulated'] = pred


pd_pred['t12_simulated'] = pd_pred['t12_simulated'].apply(lambda x: f"{x:.4e}")
pd_pred.to_csv('submission_val.csv', index=False)

100
100


### 测试集 test set
参赛者无法获取测试集或在提交后得到测试集得分
The testing sets and its results are made not accessible for contestants.

In [30]:
data_file_name = 'data/data_test/test_data_question.dat'
data = pd.read_csv(data_file_name)
input_columns = [1, 2, 3]
initial_c = data.iloc[:, input_columns].values
input_test = torch.tensor(initial_c, dtype=torch.float32)


model.eval()
pd_pred_test = pd.DataFrame(columns = ['Exp #', 't12_simulated'])

with torch.no_grad():
    for i in range(num):
        t12_pred = model(input_test[i])
        pred = t12_pred.item()
        pd_pred_test.loc[i, 'Exp #']= i
        pd_pred_test.loc[i, 't12_simulated'] = pred




pd_pred_test['t12_simulated'] = pd_pred_test['t12_simulated'].apply(lambda x: f"{x:.4e}")
pd_pred_test.to_csv('submission_test.csv', index=False)