In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import pathlib
import seaborn as sns
import pathlib
from torchmetrics.functional import r2_score
from torch.utils.data.sampler import SubsetRandomSampler
import torch.nn as nn

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [3]:
csv_path = pathlib.Path("./data/2022_Test_ML.csv")
df = pd.read_csv(csv_path)

In [4]:
df.head

<bound method NDFrame.head of      ind      s_mt      s_mq         d       h_p           QW           DP
0      0  1.409218  1.954108  2.867735  7.811623  1504.175621   501.563016
1      1  1.382565  2.006212  1.545090  8.124248  1240.530500   377.163158
2      2  1.622445  1.386172  1.192385  7.354709  1385.191155   457.404772
3      3  1.786172  1.591984  2.350701  4.324649  1602.841975  1429.795145
4      4  1.995591  2.024449  1.208417  6.993988  1326.290979   514.009143
..   ...       ...       ...       ...       ...          ...          ...
495  495  0.994188  0.898998  1.276553  7.450902  1372.713745   427.058560
496  496  2.067936  1.094389  2.198397  4.396794  1485.744424  1201.422759
497  497  1.847094  1.287174  2.134269  9.939880  1573.955147   344.570374
498  498  1.458717  1.057916  1.044088  9.110220  1090.437833   314.558218
499  499  0.811423  1.146493  2.106212  6.513026  1646.376404   671.620077

[500 rows x 7 columns]>

In [5]:
df.corr()

Unnamed: 0,ind,s_mt,s_mq,d,h_p,QW,DP
ind,1.0,-0.015201,-0.085577,0.030793,-0.033476,0.012205,0.032134
s_mt,-0.015201,1.0,0.002774,-0.002534,0.002058,-0.018162,-0.005937
s_mq,-0.085577,0.002774,1.0,0.003271,0.002259,-0.028739,-0.007786
d,0.030793,-0.002534,0.003271,1.0,-0.003678,0.449105,0.189363
h_p,-0.033476,0.002058,0.002259,-0.003678,1.0,-0.632678,-0.89803
QW,0.012205,-0.018162,-0.028739,0.449105,-0.632678,1.0,0.743292
DP,0.032134,-0.005937,-0.007786,0.189363,-0.89803,0.743292,1.0


In [6]:
a = [1,2,3,4,5,6]
print(a[:-2])
print(a[-2:])

[1, 2, 3, 4]
[5, 6]


In [7]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, data: pd.DataFrame, transfrom = None):
        self.data: pd.DataFrame = data
        self.transform = transfrom

    def __getitem__(self, index):
        x = np.asarray(self.data[[el for el in self.data.columns[:-2]]].iloc[index], dtype=np.float32)
        y = np.asarray(self.data[[el for el in self.data.columns[-2:]]].iloc[index], dtype=np.float32)
        return x,y
    def __len__(self):
        return len(self.data)
        

In [8]:
dataset = CustomDataset(df)
batch_size = 16
validation_split = .2
shuffle_dataset = True
random_seed= 42

In [9]:
dataset_size = len(dataset)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))
if shuffle_dataset :
    np.random.seed(random_seed)
    np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

In [10]:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)
train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, 
                                           sampler=train_sampler)
validation_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                                sampler=valid_sampler)

In [11]:
model = nn.Sequential(
    nn.Linear(5, 24),
    nn.ReLU(),
    nn.Linear(24, 12),
    nn.ReLU(),
    nn.Linear(12, 6),
    nn.ReLU(),
    nn.Linear(6, 2)
)
model = model.to(device)
import torch.optim as optim
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [12]:
num_epochs = 10
for epoch in range(num_epochs):
    # Train:   
    for batch_index, (X_batch, y_batch) in enumerate(train_loader):
        y_pred = model(X_batch.to(device))
        loss = r2_score(y_pred, y_batch.to(device))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


In [13]:
x_test = [el for el in validation_loader]
print(x_test[0][0])

tensor([[490.0000,   1.4663,   2.0218,   2.7154,   6.4048],
        [278.0000,   2.2202,   1.3497,   2.3347,   6.0200],
        [204.0000,   1.9766,   1.7118,   1.9459,   4.5531],
        [333.0000,   2.6733,   1.4982,   1.7415,   7.6433],
        [ 79.0000,   1.3597,   1.4357,   2.6713,   6.3086],
        [316.0000,   1.8699,   2.0870,   2.6112,   5.6353],
        [238.0000,   1.8890,   1.0866,   2.9760,   5.0341],
        [154.0000,   1.6834,   1.7926,   1.5130,   8.8096],
        [394.0000,   2.6886,   1.0319,   1.8778,   7.2946],
        [352.0000,   1.2683,   1.1856,   1.4689,   5.1062],
        [381.0000,   2.5439,   0.9277,   2.5471,   5.9599],
        [414.0000,   2.5705,   1.6441,   1.6653,   6.3808],
        [ 77.0000,   1.1541,   1.6884,   1.5972,   4.1683],
        [477.0000,   2.0261,   1.9020,   2.0461,   5.6232],
        [104.0000,   1.4854,   0.8756,   1.2725,   8.0521],
        [304.0000,   2.6200,   1.2820,   2.6673,   5.6954]])


In [17]:
test = [el for el in validation_loader]
model.eval()
y_pred = model(test[0][0][1].to(device))
result_metric = r2_score(y_pred.to(device), test[0][1][1].to(device))
print(result_metric)


tensor(-3.8558, device='cuda:0', grad_fn=<MeanBackward0>)
