In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader

from Baseline import Baseline
from RegressionTrainingTools import Trainer
device = torch.device('cuda')

In [None]:
x_train = pd.read_csv('data/train_preprocessed.csv', engine='pyarrow')
x_test = pd.read_csv('data/test_preprocessed.csv', engine='pyarrow')

x_train = x_train.loc[x_train.CI_HOUR!=0, :]

In [None]:
x_train.head()

# Preprocessing

In [None]:
drop_columns = ['ATA', 'SHIPMANAGER', 'FLAG', 'ARI', 'U_WIND', 'V_WIND', 'AIR_TEMPERATURE', 'BN']
x_train.drop(columns=drop_columns, inplace=True)
x_test.drop(columns=drop_columns, inplace=True)

In [None]:
x_train.head()

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaling_columns = ['DEADWEIGHT', 'PORT_SIZE', 'DUBAI', 'BDI_ADJ', 'BREADTH', 'DEPTH', 'DRAUGHT']
scaler = MinMaxScaler(feature_range=(-1, 1))
x_train.loc[:, scaling_columns] = scaler.fit_transform(x_train.loc[:, scaling_columns])
x_test.loc[:, scaling_columns] = scaler.transform(x_test.loc[:, scaling_columns])

In [None]:
x_train.head()

In [None]:
print(x_train.shape[1]-1 == x_test.shape[1])

In [None]:
x_test.to_csv('data/test_4dl.csv', encoding='UTF-8', index=False)
x_test = x_test.loc[x_test.DIST!=0, :]

# Data Preparation

In [None]:
y_train = np.log(x_train.CI_HOUR.copy() + 1)
x_train.drop(columns=['CI_HOUR'], inplace=True)

x_train = torch.FloatTensor(x_train.values)
y_train = torch.FloatTensor(y_train.values)
x_test = torch.FloatTensor(x_test.values)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, random_state=42, test_size=0.2)

train_dataset = TensorDataset(x_train, y_train)
val_dataset = TensorDataset(x_val, y_val)
test_dataset = TensorDataset(x_test)

train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1024, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=1024, shuffle=False)

# Modeling

In [None]:
model = Baseline(input_dim=12).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()

In [None]:
trainer = Trainer(criterion, device, save_path='checkpoints/best_model.pt')
best_model = trainer.train(model, optimizer, train_loader, val_loader, patience=7, epochs=100)

In [None]:
trainer.test(val_loader)