In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from data_utils import split_data_to_traj_and_control, mat2tracks
import wandb
import scipy.io
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
from pathlib import Path
from tqdm import tqdm

from train_utils import train_epoch, eval_epoch
from models import SplittedModel, SplittedModel2, DummyModel2


In [3]:
data_root = "./data"
data_root = Path("./data")
folder_names = os.listdir(data_root)
folder_names.sort(key = lambda x: int(x.split("_")[0]))
folder_paths = [data_root / i for i in folder_names]
folder_paths

[WindowsPath('data/10_traj'),
 WindowsPath('data/25_traj'),
 WindowsPath('data/50_traj'),
 WindowsPath('data/100_traj'),
 WindowsPath('data/200_traj'),
 WindowsPath('data/500_traj'),
 WindowsPath('data/750_traj'),
 WindowsPath('data/1000_traj')]

In [4]:
cur_dir_path =  folder_paths[0]

train_mat_path = cur_dir_path / "sdreDataset.mat"
val_mat_path = cur_dir_path / "sdreVal.mat"

train = scipy.io.loadmat(train_mat_path)["dataset"]
val = scipy.io.loadmat(val_mat_path)["sdreVal"]

In [5]:
print(train.shape, val.shape)

(1010, 3, 3) (10100, 3, 3)


In [9]:
def prepare_dataloaders_from_track(train, val, reshape=True):
    train_tracks = mat2tracks(train, reshape=reshape)
    val_tracks = mat2tracks(val, reshape=reshape)
    train_tracks = np.vstack(train_tracks) 
    val_tracks = np.vstack(val_tracks)

    train_dataset = split_data_to_traj_and_control(train_tracks)
    test_dataset = split_data_to_traj_and_control(val_tracks)
    print(f"len(train) = {len(train_dataset)} len(test) = {len(test_dataset)}")
  
    train_loader = DataLoader(train_dataset, 
                            batch_size=64, 
                            shuffle=True,
                            drop_last=True)

    test_loader = DataLoader(test_dataset, 
                            batch_size=64)
    
    return train_loader, test_loader

In [15]:
def get_model_crit_opt(hidden_dim_1=64, 
                       hidden_dim_2=64, 
                       dropout_rate=0., 
                       type_model="monolit"):
    
    device = "cuda" if torch.cuda.is_available() else "cpu"

    if type_model == "monolit":
        model = DummyModel2(hidden_dim_1=hidden_dim_1, 
                        hidden_dim_2=hidden_dim_2, 
                        dropout_rate=dropout_rate)
        
    elif type_model == "anfislike":
        model = SplittedModel2(hidden_dim_1=hidden_dim_1, 
                        hidden_dim_2=hidden_dim_2)
    
    model.to(device)
    criteria = nn.MSELoss()
    optimizer = optim.Adam(model.parameters())
    return model, device, criteria, optimizer

def train_one_model(train, val, 
                    n_epoch=60,
                    hidden_dim_1=64, 
                    hidden_dim_2=64, 
                    dropout_rate=0.,
                    wandb_loggging=False, 
                    save_weights=False, 
                    type_model="monolit"):

    train_loader, test_loader = prepare_dataloaders_from_track(train, val)

    model, device, criteria, optimizer = get_model_crit_opt(hidden_dim_1=hidden_dim_1,
                                                             hidden_dim_2=hidden_dim_2,
                                                               dropout_rate=dropout_rate, 
                                                               type_model=type_model)

    best_loss = 1e6

    if save_weights:
        save_path = f"MLP_3_{hidden_dim_1}_{hidden_dim_2}_6_best.pth"
        save_path = f"splitted_model_best.pth"

    for epoch in tqdm(range(n_epoch)):
        train_epoch(model, device, train_loader, criteria, optimizer)
        val_loss = eval_epoch(model, device, test_loader, criteria)
        
        if val_loss < best_loss:
            best_loss = val_loss
            if save_weights:
                torch.save(model.state_dict(), save_path)
            # print(f"Improve eval losss on epoch {epoch} = ", best_loss)
            
        if wandb_loggging:
            wandb.log({
                "val_loss": val_loss,
                "epoch" : epoch
                })
            
    return best_loss.item()

## Monolit Model

In [16]:
n_traj = []
losses = []

for cur_dir_path in folder_paths:
    train_mat_path = cur_dir_path / "sdreDataset.mat"
    val_mat_path = cur_dir_path / "sdreVal.mat"
    train = scipy.io.loadmat(train_mat_path)["dataset"]
    val = scipy.io.loadmat(val_mat_path)["sdreVal"]
    best_loss = train_one_model(train, val, type_model="monolit")
    
    losses.append(best_loss)
    n_traj.append(int(str(cur_dir_path).split("\\")[1].split("_")[0]))
    print(f"best_loss = {best_loss}")

len(train) = 1010 len(test) = 10100


100%|██████████| 60/60 [00:16<00:00,  3.65it/s]


best_loss = 0.017369501292705536
len(train) = 2525 len(test) = 10100


100%|██████████| 60/60 [00:19<00:00,  3.14it/s]


best_loss = 0.013264141045510769
len(train) = 5050 len(test) = 10100


100%|██████████| 60/60 [00:23<00:00,  2.51it/s]


best_loss = 0.007438871543854475
len(train) = 10100 len(test) = 10100


100%|██████████| 60/60 [00:36<00:00,  1.64it/s]


best_loss = 0.006379625294357538
len(train) = 20200 len(test) = 10100


100%|██████████| 60/60 [01:00<00:00,  1.00s/it]


best_loss = 0.00578113179653883
len(train) = 50500 len(test) = 10100


100%|██████████| 60/60 [02:06<00:00,  2.10s/it]


best_loss = 0.005083444528281689
len(train) = 75750 len(test) = 10100


 68%|██████▊   | 41/60 [02:48<02:54,  9.16s/it]

# Anfislike Model

In [None]:
n_traj_anfis = []
losses_anfis = []

for cur_dir_path in folder_paths:
    train_mat_path = cur_dir_path / "sdreDataset.mat"
    val_mat_path = cur_dir_path / "sdreVal.mat"
    train = scipy.io.loadmat(train_mat_path)["dataset"]
    val = scipy.io.loadmat(val_mat_path)["sdreVal"]
    best_loss = train_one_model(train, val, type_model="anfislike")
    
    losses_anfis.append(best_loss)
    n_traj_anfis.append(int(str(cur_dir_path).split("\\")[1].split("_")[0]))
    print(f"best_loss = {best_loss}")

# Visualization

In [None]:
plt.scatter(n_traj, losses)
plt.plot(n_traj, losses, "--", label="monoolit")
plt.plot(n_traj_anfis, losses_anfis, "--", label="monoolit", , label="anfislike")

plt.grid()
plt.legend()
plt.xlabel("Число треков для обучения NN")
plt.ylabel("MSE NN на валидации")

# Save Results

In [None]:
import pickle

save_data = {"n_traj" : n_traj, 
             "losses" : losses}

savename = "monolit_model"

with open(f"{savename}.pickle", 'wb') as handle:
    pickle.dump(save_data, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
import pickle

save_data = {"n_traj" : n_traj_anfis, 
             "losses" : losses_anfis}

savename = "3_model_anfislike"

with open(f"{savename}.pickle", 'wb') as handle:
    pickle.dump(save_data, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
# cur_dir_path = folder_paths[0]

# train_mat_path = cur_dir_path / "sdreDataset.mat"
# val_mat_path = cur_dir_path / "sdreVal.mat"
# train = scipy.io.loadmat(train_mat_path)["dataset"]
# val = scipy.io.loadmat(val_mat_path)["sdreVal"]

# best_loss = train_one_model(train, val)

In [73]:
# for cur_dir_path in folder_paths:
#     train_mat_path = cur_dir_path / "sdreDataset.mat"
#     val_mat_path = cur_dir_path / "sdreVal.mat"

#     train = scipy.io.loadmat(train_mat_path)["dataset"]
#     val = scipy.io.loadmat(val_mat_path)["sdreVal"]
#     print(train.shape, val.shape)