In [None]:
!pip install pytorch_ranger

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import time
from scipy.signal import welch
from scipy.fft import fft
import polars as pl  
import torch
from torch import nn,Tensor
import torch.nn.functional as F
from torch.optim.lr_scheduler import ExponentialLR

from pytorch_ranger import Ranger
from torch.utils.data import DataLoader, TensorDataset
from scipy.special import softmax
import tqdm
from sklearn.model_selection import train_test_split

import pickle



In [None]:
# Column transformations for timestamp
dt_transforms = [
    pl.col('timestamp').str.to_datetime(),  # Convert timestamp to datetime
    (pl.col('timestamp').str.to_datetime().dt.year() - 2000).cast(pl.UInt8).alias('year'),  # Extract and cast year
    pl.col('timestamp').str.to_datetime().dt.month().cast(pl.UInt8).alias('month'),  # Extract and cast month
    pl.col('timestamp').str.to_datetime().dt.day().cast(pl.UInt8).alias('day'),  # Extract and cast day
    pl.col('timestamp').str.to_datetime().dt.hour().cast(pl.UInt8).alias('hour'),  # Extract and cast hour
    pl.col('timestamp').str.to_datetime().dt.minute().cast(pl.UInt8).alias('minute'),
    pl.col('timestamp').str.to_datetime().dt.second().cast(pl.UInt8).alias('second')
]

# Column transformations for data
data_transforms = [
    pl.col('anglez').cast(pl.Int16),  # Casting 'anglez' to 16-bit integer
    (pl.col('enmo') * 1000).cast(pl.UInt16)  # Convert 'enmo' to 16-bit unsigned integer
]

# Loading and transforming training series data
train_series = pl.scan_parquet('/kaggle/input/child-mind-institute-detect-sleep-states/train_series.parquet').with_columns(
    dt_transforms + data_transforms
)

# Loading and transforming training events data
train_events = pl.read_csv('/kaggle/input/child-mind-institute-detect-sleep-states/train_events.csv').with_columns(
    dt_transforms
)

# Loading and transforming test series data
test_series = pl.scan_parquet('/kaggle/input/child-mind-institute-detect-sleep-states/test_series.parquet').with_columns(
    dt_transforms + data_transforms
)

# Getting unique series IDs for convenience
series_ids = train_events['series_id'].unique(maintain_order=True).to_list()

# Removing series with mismatched event counts (onset vs. wakeup)
onset_counts = train_events.filter(pl.col('event') == 'onset').group_by('series_id').count().sort('series_id')['count']
wakeup_counts = train_events.filter(pl.col('event') == 'wakeup').group_by('series_id').count().sort('series_id')['count']

counts = pl.DataFrame({'series_id': sorted(series_ids), 'onset_counts': onset_counts, 'wakeup_counts': wakeup_counts})
count_mismatches = counts.filter(counts['onset_counts'] != counts['wakeup_counts'])

# Filtering out series with count mismatches
train_series = train_series.filter(~pl.col('series_id').is_in(count_mismatches['series_id']))
train_events = train_events.filter(~pl.col('series_id').is_in(count_mismatches['series_id']))

# Updating the list of series IDs, excluding series with no non-null values
series_ids = train_events.drop_nulls()['series_id'].unique(maintain_order=True).to_list()


In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def create_gaussian_array(n, centrs, sigma):
    """
    Создает массив чисел с гауссовым распределением.

    :param n: Длина массива.
    :param centr: Массив центров гауссового распределения.
    :param sigma: Стандартное отклонение (ширина) гауссовой функции.
    :return: Массив значений с гауссовым распределением.
    """
    ans = np.zeros(n)
    for centr in centrs:
        x = np.linspace(0, n - 1, n)  # Создаем массив индексов от 0 до n-1
        gaussian = np.exp(-((x - centr) ** 2) / (2 * sigma ** 2))  # Вычисляем гауссово распределение
        # Нормализуем гауссову функцию так, чтобы значение в точке centr было равно 1
        gaussian /= gaussian[int(centr)]
        ans += gaussian
    original_array = ans
    normalized_array = (original_array - np.min(original_array)) / (np.max(original_array) - np.min(original_array))    
    
    return normalized_array

# Пример использования функции
n = 17000  # Длина массива
centr = [2000, 7000]  # Центр гауссового распределения
sigma = 100  # Стандартное отклонение

gaussian_array = create_gaussian_array(n, centr, sigma)

# Выводим массив
print(gaussian_array)

# Визуализируем гауссово распределение
plt.plot(gaussian_array)
plt.title("Гауссово распределение с центром в точке {}".format(centr))
plt.show()


#### create_gaussian(5, 10, 2)

In [None]:
"""
Класс датасета
Обращаемся по series_id, то есть он хранит данные одного series_id
len(Dataset) - кол-во полных суток в серии (от 12 дня до 12 дня, по умолчанию)
Dataset[i] - i-е сутки датасета, возращает 2 тенозра размером (17280, 2) - 
[enmo, anglez] и [is_wakeup, is_onset]

Параметр sigma для настройки гауссово распределения (sigma ~ 70-100)
hour - час, с которого ведется отсчет суток(от 0 до 23), например hour=12, сутки от 12 дня до 12 дня

self.data_series - датафрейм с фичами
self.data_events - датафрейм с ивентами
self.target - датафрейм с is_wakeup и is_onset
"""

class Dataset(nn.Module):
    def __init__(self, data_series, data_events, series_id, sigma=90, hour=15):
        data_series = data_series.filter(pl.col('series_id') == series_id).collect()
        data_series = pd.DataFrame(data_series, columns=data_series.columns)
        index = data_series.query(f'hour == {hour} and minute == 0 and second == 0').index[0]
        self.data_series = data_series.iloc[index:].reset_index(drop=True)
        
        self.data_series['month_sin'] = self.data_series['month'].apply(lambda x: np.sin((x / 12) * 2*np.pi ) )
        self.data_series['month_cos'] = self.data_series['month'].apply(lambda x: np.cos((x / 12) * 2*np.pi ) )

        self.data_series['hour_sin'] = self.data_series['hour'].apply(lambda x: np.sin((x / 24) * 2*np.pi ) )
        self.data_series['hour_cos'] = self.data_series['hour'].apply(lambda x: np.cos((x / 24) * 2*np.pi ) )

        self.data_series['weekday_sin'] = self.data_series['timestamp'].apply(lambda x: np.sin((x.weekday() / 7) * 2*np.pi ))
        self.data_series['weekday_cos'] = self.data_series['timestamp'].apply(lambda x: np.cos((x.weekday() / 7) * 2*np.pi ))

        data_events = data_events.filter(pl.col('series_id') == series_id)
        data_events = pd.DataFrame(data_events, columns=data_events.columns)
        self.data_events = data_events
        
        data_events = data_events.dropna(ignore_index=True)
        data_events = data_events.set_index('step')
        
        target = pd.DataFrame(data_series[['step']])
        target['is_wakeup'] = 0  # Инициализируем столбец is_wakeup нулями

        # Устанавливаем единички в тех индексах, где event = "wakeup"
        target.loc[data_events[data_events['event'] == 'wakeup'].index, 'is_wakeup'] = 1

        # Установка единичек в колонке is_onset
        target['is_onset'] = 0  # Инициализируем столбец is_onset нулями

        # Устанавливаем единички в тех индексах, где event = "onset"
        target.loc[data_events[data_events['event'] == 'onset'].index, 'is_onset'] = 1
        
        for event in ['is_wakeup', 'is_onset']:
            target[f'new_{event}'] = 0  # Инициализируйте новую колонку нулями

            # Найдите индексы, где значение 'target' равно 1
            ones_indices = target.index[target[event] == 1]

            # Заполните новую колонку 'new_target' Гауссовыми значениями в окрестности единиц
            values_around_one = create_gaussian_array(len(target), ones_indices, sigma)
            target[f'new_{event}'] = target[f'new_{event}'] + values_around_one
            # Удалите исходную колонку 'target', если она больше не нужна
            target[event] = target[f'new_{event}']
            target = target.drop(columns=[f'new_{event}'])
        
        
        self.target = target.iloc[index:].reset_index(drop=True)
    
    def __len__(self):
        items_per_day = 12*30
        return len(self.data_series) // items_per_day

    def __getitem__(self, index):
        items_per_day = 12*30
        X1 = self.data_series[['enmo', 'anglez']].iloc[index*items_per_day:(index+1)*items_per_day].values.astype(np.float32)
        X1 = torch.tensor(X1)
        
        X2 = self.data_series[['month_sin', 'month_cos', 'hour_sin', 'hour_cos', 'weekday_sin', 'weekday_cos']].iloc[index*items_per_day:(index+1)*items_per_day].mean()
        X2 = torch.tensor(X2)
        
        y = self.target[['is_wakeup', 'is_onset']].iloc[index*items_per_day:(index+1)*items_per_day].values.astype(np.float32)
        y = torch.tensor(y).reshape(items_per_day*2)
        
        
        return X1, X2, y
    
    def get(self):
        n = len(self)
        X1 = torch.tensor([])
        X2 = torch.tensor([])
        y = torch.tensor([])
        for i in range(n):
            X1_i, X2_i, y_i = self[i]
            X1 = torch.cat((X1, X1_i), 0)
            X2 = torch.cat((X2, X2_i), 0)
            y = torch.cat((y, y_i), 0)
        
        X1 = X1.view(-1, 12*30, 2)
        X2 = X2.view(-1, 6)
        y = y.view(-1, 12*30*2)
        
        return X1, X2, y

In [None]:
data = Dataset(train_series, train_events, series_ids[0])

In [None]:
X1, X2, y = data.get()

In [None]:
X2.size()

In [None]:
plt.plot(y[17, :])

In [None]:
plt.plot(X1[17, :, 1])

In [None]:
for i in range(10):
    X, y = data[i]
    # Построение графика для колонки is_wakeup (цвет синий)
    plt.plot(range(y.size()[0]), y[:, 0], color='blue', label='is_wakeup')

    # Построение графика для колонки is_onset (цвет красный)
    plt.plot(range(y.size()[0]), y[:, 1], color='red', label='is_onset')

    # Добавление меток осей и легенды
    plt.xlabel('Индексы')
    plt.ylabel('Значения')
    plt.legend()

    # Отображение графика
    plt.show()

In [None]:
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, n_layers=1, bidir=True):
        super(GRUModel, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.n_layers = n_layers

        self.gru = nn.GRU(
            input_size,
            hidden_size,
            n_layers,
            batch_first=True,
            bidirectional=bidir,
        )
        dir_factor = 2 if bidir else 1
        self.fc1 = nn.Linear(
            hidden_size * dir_factor, hidden_size * dir_factor
        )
        self.ln1 = nn.LayerNorm(hidden_size * dir_factor)
        self.fc2 = nn.Linear(hidden_size * dir_factor, hidden_size)
        self.ln2 = nn.LayerNorm(hidden_size)

    def forward(self, x, h=None):
        res, new_h = self.gru(x, h)
        # res.shape = (batch_size, sequence_size, 2*hidden_size)

        res = self.fc1(res)
        res = self.ln1(res)
        res = nn.functional.relu(res)

        res = self.fc2(res)
        res = self.ln2(res)
        res = nn.functional.relu(res)

        return res, new_h

In [None]:
class ResidualBiGRU(nn.Module):
    def __init__(self, hidden_size, n_layers=1, bidir=True):
        super(ResidualBiGRU, self).__init__()

        self.hidden_size = hidden_size
        self.n_layers = n_layers

        self.gru = nn.GRU(
            hidden_size,
            hidden_size,
            n_layers,
            batch_first=True,
            bidirectional=bidir,
        )
        dir_factor = 2 if bidir else 1
        self.fc1 = nn.Linear(
            hidden_size * dir_factor, hidden_size * dir_factor * 2
        )
        self.ln1 = nn.LayerNorm(hidden_size * dir_factor * 2)
        self.fc2 = nn.Linear(hidden_size * dir_factor * 2, hidden_size)
        self.ln2 = nn.LayerNorm(hidden_size)

    def forward(self, x, h=None):
#         print(x.shape, 'x')
        res, new_h = self.gru(x, h)
        # res.shape = (batch_size, sequence_size, 2*hidden_size)

        res = self.fc1(res)
        res = self.ln1(res)
        res = nn.functional.relu(res)

        res = self.fc2(res)
        res = self.ln2(res)
        res = nn.functional.relu(res)

        # skip connection
        res = res + x

        return res, new_h

class MultiResidualBiGRU(nn.Module):
    def __init__(self, input_size, hidden_size, n_layers, n_inputs, fc_out_size, out_size, bidir=True):
#         input_size, hidden_size, n_inputs, fc_out_size, out_size
        super(MultiResidualBiGRU, self).__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.out_size = out_size
        self.n_layers = n_layers
        self.n_inputs = n_inputs
        #FC for input data for GRU
        self.fc_in = nn.Linear(input_size, hidden_size)
        self.ln = nn.LayerNorm(hidden_size)
        
        #Insteed FC we use Conv1d
#         self.conv_in_1 = nn.Conv1d(1, 1, kernel_size=5, stride=3)
#         self.ln_conv1 = nn.LayerNorm(5759)
#         self.conv_in_2 = nn.Conv1d(1, 1, kernel_size=5, stride=3)
#         self.ln_conv2 = nn.LayerNorm(1919)
            
        self.res_bigrus = nn.ModuleList(
            nn.ModuleList(
            [
                ResidualBiGRU(hidden_size, n_layers=1, bidir=bidir)
                for _ in range(n_layers)
            ]
        ) for i in range(2)
             )
        self.fc = nn.Linear(hidden_size*n_inputs, fc_out_size*2)
        self.fc1 = nn.Linear(fc_out_size*2, fc_out_size*4)
        
#         self.conv1 = nn.ConvTranspose1d(1, 1, kernel_size=5, stride=3)
#         self.conv1_out = nn.ConvTranspose1d(1, 1, kernel_size=5, stride=3)
        
#         self.conv2 = nn.ConvTranspose1d(1, 1, kernel_size=5, stride=3)
#         self.conv2_out = nn.ConvTranspose1d(1, 1, kernel_size=5, stride=3)

        self.fc_out_1 = nn.Linear(fc_out_size*4*3*3 + 2**3, out_size)
        self.fc_out_2 = nn.Linear(fc_out_size*4*3*3 + 2**3, out_size)

    def forward(self, X, h=None):
        if h is None:
            # (re)initialize the hidden state
            h = [None for _ in range(self.n_layers)]
        outs = torch.tensor([]).to(device)
        for i in range(self.n_inputs-1, -1, -1):
            data = X[:, :, i].squeeze()
#             print(data.shape, 'data')
            first_hidden = self.ln(self.fc_in(data))
#             print(first_hidden.shape, 'hidden')
#             first_hidden = self.ln_conv1(self.conv_in_1(data))
#             first_hidden = self.ln_conv2(self.conv_in_2(first_hidden))
#             first_hidden = first_hidden.squeeze()
            # if we are at the beginningm of a sequence (no hidden state)
            new_h = []
            for j, res_bigru in enumerate(self.res_bigrus[i]):
                first_hidden, new_hi = res_bigru(first_hidden, h[j])#[batch_size, hidden_size]
                new_h.append(new_hi)
            outs = torch.cat((outs, first_hidden), 1)#[batch_size, hidden_size*2]
        x = self.fc(outs)#[batch_size, fc_out_size*2]
        x = F.relu(x)
        x = self.fc1(x)
        
        # We predict for onset and wakeup
        x1 = x.view(x.size()[0], 1, x.size()[1]) 
#         x1 = self.conv1(x1)
        x1 = torch.relu(x1)
#         x1 = self.conv1_out(x1)
#         x1 = torch.relu(x1)
        y_is_wakeup = x1.view(x1.size()[0], x1.size()[2])
        y_is_wakeup = self.fc_out_1(y_is_wakeup)
        
#         x2 = self.fc_out_2(x)
#         x2 = torch.sigmoid(x2)
        x2 = x.view(x.size()[0], 1, x.size()[1]) 
#         x2 = self.conv2(x2)
#         x2 = torch.relu(x2)
#         x2 = self.conv2_out(x2)
        x2 = torch.relu(x2)
        y_is_onset = x2.view(x2.size()[0], x2.size()[2])
        y_is_onset = self.fc_out_2(y_is_onset)
        
        
        y = torch.cat((y_is_wakeup, y_is_onset), 1)
        y = torch.relu(y)
        
        return y  

In [None]:
class Allmodel(nn.Module):
    def __init__(self, input_size, input_size_features, hidden_size, n_inputs, fc_out_size, out_size):
        super(Allmodel, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.input_size_features = input_size_features
        self.n_inputs = n_inputs
        self.out_size = out_size
        
        self.fc_in = nn.Linear(input_size, hidden_size*6)
        self.ln = nn.LayerNorm(hidden_size*6)
        
        self.conv_in = nn.Conv1d(1, 1, kernel_size=3, stride=1)
        self.conv_in_1 = nn.Conv1d(1, 1, kernel_size=5, stride=1)
        
        self.models = [
            GRUModel(354, hidden_size)
            for _ in range(n_inputs)
        ]
        self.fc = nn.Linear(hidden_size*n_inputs + input_size_features, fc_out_size)
        self.fc1 = nn.Linear(fc_out_size, fc_out_size*2)
        
#         self.conv1 = nn.ConvTranspose1d(1, 1, kernel_size=3, stride=1)
#         self.conv1_out = nn.ConvTranspose1d(1, 1, kernel_size=3, stride=1)
        
#         self.conv2 = nn.ConvTranspose1d(1, 1, kernel_size=3, stride=1)
#         self.conv2_out = nn.ConvTranspose1d(1, 1, kernel_size=3, stride=1)
#         self.fc_out_1 = nn.Linear(fc_out_size*4, fc_out_size*8)
#         self.fc_out_is_wakeup = nn.Linear(fc_out_size*8, out_size)
        
#         self.fc_out_2 = nn.Linear(fc_out_size*4, fc_out_size*8)
#         self.fc_out_is_onset = nn.Linear(fc_out_size*8, out_size)
        self.fc_out_1 = nn.Linear(404, out_size)
        self.fc_out_2 = nn.Linear(404, out_size)
    
    def forward(self, x, x_features, h=None):

        outs = torch.tensor([]).to(device)
        new_h = h
        for i in range(self.n_inputs):
            model = self.models[i].to(device)
#             out = self.fc_in(x[:, :, i])
#             out = self.ln(out)
#             out = torch.sigmoid(out)
            x1 = x[:, :, i]
            x1 = x1.view(x1.size()[0], 1, x1.size()[1]) 
            
            out = self.conv_in(x1)
            out = torch.relu(out)
            out = self.conv_in_1(out)
            out = torch.relu(out)
            out = out.view(out.size()[0], out.size()[2])
            out, new_h = model(out, new_h)
            outs = torch.cat((outs, out), 1)
        
        outs = torch.cat((outs, x_features.to(device)), 1).to(torch.float32)
        x = self.fc(outs)
        x = torch.relu(x)
        x = self.fc1(x)
        x = torch.relu(x)
        
#         x1 = self.fc_out_1(x)
#         x1 = torch.sigmoid(x1)
        x1 = x.view(x.size()[0], 1, x.size()[1]) 
        x1 = self.conv1(x1)
        x1 = torch.relu(x1)
        x1 = self.conv1_out(x1)
        x1 = torch.relu(x1)
        y_is_wakeup = x1.view(x1.size()[0], x1.size()[2])
        y_is_wakeup = self.fc_out_1(y_is_wakeup)
        
#         x2 = self.fc_out_2(x)
#         x2 = torch.sigmoid(x2)
        x2 = x.view(x.size()[0], 1, x.size()[1]) 
        x2 = self.conv2(x2)
        x2 = torch.relu(x2)
        x2 = self.conv2_out(x2)
        x2 = torch.relu(x2)
        y_is_onset = x2.view(x2.size()[0], x2.size()[2])
        y_is_onset = self.fc_out_2(y_is_onset)
        
        
        y = torch.cat((y_is_wakeup, y_is_onset), 1)
        y = torch.sigmoid(y)
        
        return y
    
        
        


In [None]:
X1, X2, y = data[0]
y_f = torch.cat((y, X2), 0)
y_f[:12*30*2].size()

In [None]:
model = Allmodel(17280, 256, 2, 256, 17280).to(device)
y = model(X.to(device))

In [None]:
y.squeeze(-1).size()

In [None]:
def train(model, optimizer, scheduler, criterion, metric, data, test_size, batch_size, epochs=5):
    history = {}
    model = model.to(device)
    X, X_features, y = data
    y = torch.cat((y, X_features), 1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, shuffle=True)
    data_train = TensorDataset(X_train, y_train)
    data_test = TensorDataset(X_test, y_test)
    train_dataloader = DataLoader(data_train, batch_size=batch_size)
    test_dataloader = DataLoader(data_test)
    for epoch in tqdm.tqdm(range(1, epochs + 1)):
        losses = []
        metrics = []
        for X, y in train_dataloader:
            model.train()
            optimizer.zero_grad()
            X = X.to(device)
            X_features = y[:, 12*30*2:]
            y = y[:, :12*30*2]
            preds = model(X, X_features)
            y = y.to(device)
            loss = criterion(preds, y)
            loss.backward() 
            optimizer.step()

            metric_loss = metric(preds, y)
            metrics.append(metric_loss.detach().item())
            losses.append(loss.detach().item())
        metrics_test = []
        for X, y in test_dataloader:
            model.eval()
            X = X.to(device)
            X_features = y[:, 12*30*2:]
            y = y[:, :12*30*2]
            y = y.to(device)
            with torch.no_grad(): 
                preds = model(X, X_features)
                loss = criterion(preds, y)
            metric_loss = metric(preds, y)
            metrics_test.append(metric_loss.detach().item())
        history[epoch] = (losses, metrics, metrics_test)
        print(sum(losses)/len(losses))
        if epoch % 1 == 0: scheduler.step()
    
    return history, y, preds
            

In [None]:
class CustomLoss(nn.Module):
    def __init__(self, c=0.5, bias=0.2):
        super(CustomLoss, self).__init__()
        self.c = c
        self.bias = bias
        
    def forward(self, y_output, y_true):
        
        loss = 1/(self.c * (torch.abs(y_true - 0.6) + self.bias)) * (y_true - y_output)**2    # (torch.abs(y_true - 0.5) + self.bias)
        return loss.mean() 

In [None]:
X1, X2, y = data.get()

In [None]:
plt.plot(y[0])

In [None]:
items_per_day = 12*30
model = Allmodel(input_size=items_per_day, 
                 input_size_features=6,
                hidden_size=350,
                n_inputs=2,
                fc_out_size=200,
                out_size=items_per_day)
data = (X1, X2, y)
optimizer = Ranger(model.parameters(), lr=1e-3)
scheduler = ExponentialLR(optimizer, gamma=0.9)
criterion = CustomLoss().to(device)
metric = nn.MSELoss().to(device)
history, y_last, pred = train(model, optimizer, scheduler, criterion, metric, data, 1/5, 256, epochs = 20)

In [None]:
losses, metrics, metrics_test = history[5]

In [None]:
met_list = []
for i in range(1, 20 + 1):
    losses, metrics, metrics_test = history[i]
    met_list.append(sum(metrics_test)/len(metrics_test))
    
plt.plot(met_list)

In [None]:
plt.plot(pred[1][:len(pred[1])//2].to('cpu'))
plt.plot(y[1][:len(pred[0])//2].to('cpu'))

In [None]:
for i in range(30):
    preds = model(X1[i].reshape(1, 360, 2), X2[i].reshape(1, 6)).cpu()
    plt.plot(preds[0].detach().numpy())
    plt.plot(y[i].cpu().numpy())
    plt.show()

In [None]:
X1[0].reshape(1, 360, 2).shape

In [None]:
plt.plot(pred[0][len(pred[0])//2:].to('cpu'))
plt.plot(y[-1][len(pred[0])//2:].to('cpu')*0.1)

In [None]:
pred[0]

In [None]:
plt.plot(y_last[0][len(y_last[0])//2:].to('cpu')*1000000, color='blue')
plt.plot(y_last[0][:len(y_last[0])//2].to('cpu')*1000000, color='green')
plt.plot(X[-1])

In [None]:
plt.plot(X[-1])

In [None]:
sum(metrics_test)/len(metrics_test)

In [None]:
X1 = torch.load('/kaggle/input/new-dataset/x1_train')

In [None]:
X2 = torch.load('/kaggle/input/new-dataset/x2_train')

In [None]:
y = torch.load('/kaggle/input/new-dataset/y_train (1)')

In [None]:
X.size()

In [None]:
int(len(series_ids)*0.8)

In [None]:
t = torch.tensor([[1], [2], [3]])
q = torch.tensor([[4], [5], [6]])

r = torch.cat((t, q), 1)
r

In [None]:
X1 = torch.tensor([]).to(device)
X2 = torch.tensor([]).to(device)
y = torch.tensor([]).to(device)
for series_id in tqdm.tqdm(series_ids):   #train_dataset
    data = Dataset(train_series, train_events, series_id)
    X1_i, X2_i, y_i = data.get()
    X1 = torch.cat((X1, X1_i.to(device)), 0)
    X2 = torch.cat((X2, X2_i.to(device)), 0)
    y = torch.cat((y, y_i.to(device)), 0)


In [None]:
X1_save = X1
X2_save = X2
y_save = y

In [None]:
y.size()

In [None]:
torch.save(X1, 'x1_train')

In [None]:
torch.save(X2, 'x2_train')

In [None]:
torch.save(y, 'y_train')

In [None]:
len(y)//(12*60*24)/3

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3, shuffle=False)
data_train = TorchDataset(X_train, y_train)

In [None]:
tn = torch.tensor([1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6])
tn

In [None]:
y_reshaped = tn.view(-1, 2, 3)

# Теперь y_reshaped имеет размерность (n, 2), и элементы чередуются внутри подмассивов
print(y_reshaped.size())

In [None]:
X = X.view(-1, 12*60*24, 2)

In [None]:
X.size()

In [None]:
y = y.view(-1, 12*60*24*2)

In [None]:
for i in range(len(y)):
    y[i] = (y[i] + 1e-20) / ((y[i] + 1e-20).sum())

In [None]:
plt.plot(y[-1])

In [None]:
y.size()

In [None]:
from sklearn.preprocessing import StandardScaler

st_scale = StandardScaler()
for i in range(X.shape[0]):
    X[i] = torch.tensor(st_scale.fit_transform(X[i]))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3, shuffle=False)
data_train = TensorDataset(X_train, y_train)
data_test = TensorDataset(X_test, y_test)
train_dataloader = DataLoader(data_train, batch_size=8)

In [None]:
for x2, y2 in train_dataloader:
    break

In [None]:
x2.size()