In [None]:
import pandas as pd
import numpy as np

import torch

import os

In [None]:
# Set the fold and the device used for training

current_fold = 0
device_str = "cpu"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    torch.cuda.set_device(0)
    device_str = "cuda"

In [None]:
# Load the raw data

X_train_raw = pd.read_csv('../X_train.csv', index_col='id')
y_train_raw = pd.read_csv('../y_train.csv', index_col=0)['y'].to_numpy()

In [None]:
# Expand all signals to 18000 length

from myutils import multi_features

os.environ['OMP_NUM_THREADS'] = "1"

train_expanded = multi_features(X_train_raw, n_cores=128)

In [None]:
import json

def load_file(name, index):
    
    path = '../split/'
    with open(path + name + str(index) + ".json") as f:
        arr = json.load(f)
        
    return arr
    
train_indices = np.array(load_file('train', current_fold))
val_indices = np.array(load_file('val', current_fold))

X_train = train_expanded.iloc[train_indices]
X_val = train_expanded.iloc[val_indices]
y_train = y_train_raw[train_indices]
y_val = y_train_raw[val_indices]
X_train = np.expand_dims(X_train, 1)
X_val = np.expand_dims(X_val, 1)

print(X_train.shape, y_train.shape)

In [None]:
from deepnet import MyDataset
from torch.utils.data import DataLoader

batch_size = 32

dataset = MyDataset(X_train, y_train)
val_dataset = MyDataset(X_val, y_val)
dataloader = DataLoader(dataset, batch_size=batch_size)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)

In [None]:
from torchsummary import summary
from deepnet import DeepNet

model = DeepNet(in_channels=18000, n_classes=4).to(device)

print(X_train.shape[1], X_train.shape[2])
summary(model, (X_train.shape[1], X_train.shape[2]), device=device_str)

In [None]:
import torch.optim as optim

model.verbose = False
optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=0)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)
loss_func = torch.nn.CrossEntropyLoss()

In [None]:
from sklearn.metrics import f1_score
from tqdm import tqdm

n_epoch = 100
step = 50
curr_best = 0
curr_name = ""
for _ in tqdm(range(n_epoch), desc="epoch"):
    
    
    # train
    model.train()
    prog_iter = dataloader#tqdm(dataloader, desc="Training", leave=True)
    for batch_idx, batch in enumerate(prog_iter):
        input_x, input_y = tuple(t.to(device) for t in batch)
        pred = model(input_x)
        loss = loss_func(pred, input_y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        step += 1
    
    scheduler.step(_)
    
    # test
    model.eval()
    prog_iter_val = val_dataloader#tqdm(val_dataloader, desc="Testing")
    all_pred_prob = []
    true_pred = []
    with torch.no_grad():
        for batch_idx, batch in enumerate(prog_iter_val):
            input_x, input_y = tuple(t.to(device) for t in batch)
            pred = model(input_x)
            all_pred_prob.append(pred.cpu().data.numpy())
            true_pred.append(input_y.cpu().data.numpy())
    all_pred_prob = np.concatenate(all_pred_prob)
    all_pred = np.argmax(all_pred_prob, axis=1)
    
    all_true_pred = np.concatenate(true_pred)
    
    micro_score = f1_score(all_true_pred, all_pred, average=None)
    print(micro_score) 
    
    score = f1_score(all_true_pred, all_pred, average='micro')
    print(score) 
    
    if (score > curr_best):
        try:
            os.remove(curr_name)
        except:
            pass
        curr_best = score
        curr_name = f"../models/fold{current_fold}_fella_{score}.pth"
        torch.save(model.state_dict(), curr_name)