In [None]:
!pip install -q scikit-learn

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

In [None]:
# Фиксация сида
import random

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.set_num_threads(1)
    torch.set_num_interop_threads(1)

set_seed(42)

In [None]:
# Загрузка файла
from google.colab import files
uploaded = files.upload()

# Чтение
df = pd.read_csv(list(uploaded.keys())[0])
df["date"] = pd.to_datetime(df["date"])
df.set_index("date", inplace=True)

# Параметры
REGION = "Barents Sea"
SEQ_LEN = 56
PRED_LEN = 7
SEG_LEN = 7
BATCH_SIZE = 32

# Нормализация
series = df[[REGION]].values
scaler = StandardScaler()
series_scaled = scaler.fit_transform(series)

# Датасет
class SeaIceDataset(Dataset):
    def __init__(self, data, seq_len, pred_len):
        self.data = data
        self.seq_len = seq_len
        self.pred_len = pred_len
        self.X, self.Y = self.create_sequences()

    def create_sequences(self):
        X, Y = [], []
        for i in range(len(self.data) - self.seq_len - self.pred_len + 1):
            x_i = self.data[i:i + self.seq_len]
            y_i = self.data[i + self.seq_len:i + self.seq_len + self.pred_len]
            X.append(x_i)
            Y.append(y_i)
        return np.array(X), np.array(Y)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx], dtype=torch.float32), torch.tensor(self.Y[idx], dtype=torch.float32)

# Тест и трейн
split = int(len(series_scaled) * 0.7)
train_data = series_scaled[:split]
test_data = series_scaled[split - SEQ_LEN - PRED_LEN:]

train_dataset = SeaIceDataset(train_data, SEQ_LEN, PRED_LEN)
test_dataset = SeaIceDataset(test_data, SEQ_LEN, PRED_LEN)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

Saving sea_ice_concentration_by_region.csv to sea_ice_concentration_by_region.csv


# 1. Модель SegRNN

In [None]:
# Модель

class segRNN(nn.Module):
    def __init__(self, seq_len, pred_len, seg_len, enc_in=1, d_model=64, dropout=0.1):
        super(segRNN, self).__init__()
        self.seq_len = seq_len
        self.pred_len = pred_len
        self.seg_len = seg_len
        self.enc_in = enc_in
        self.d_model = d_model
        self.dropout = dropout

        self.seg_num_x = seq_len // seg_len
        self.seg_num_y = pred_len // seg_len

        self.valueEmbedding = nn.Sequential(
            nn.Linear(seg_len, d_model),
            nn.ReLU()
        )
        self.rnn = nn.GRU(d_model, d_model, batch_first=True)
        self.pos_emb = nn.Parameter(torch.randn(self.seg_num_y, d_model // 2))
        self.channel_emb = nn.Parameter(torch.randn(enc_in, d_model // 2))

        self.predict = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(d_model, seg_len)
        )

    def encoder(self, x):
        B, L, C = x.shape
        seq_last = x[:, -1:, :].detach()
        x = (x - seq_last).permute(0, 2, 1)  # [B,C,L]
        x = x.reshape(-1, self.seg_num_x, self.seg_len)
        x = self.valueEmbedding(x)
        _, hn = self.rnn(x)

        pos = torch.cat([
            self.pos_emb.unsqueeze(0).repeat(self.enc_in, 1, 1),
            self.channel_emb.unsqueeze(1).repeat(1, self.seg_num_y, 1)
        ], dim=-1).view(-1, 1, self.d_model).repeat(B, 1, 1)

        _, hy = self.rnn(pos, hn.repeat(1, 1, self.seg_num_y).view(1, -1, self.d_model))
        y = self.predict(hy).view(-1, self.enc_in, self.pred_len).permute(0, 2, 1)
        return y + seq_last

    def forward(self, x):
        return self.encoder(x)

In [None]:
# Обучение модели

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = segRNN(seq_len=SEQ_LEN, pred_len=PRED_LEN, seg_len=SEG_LEN).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()

EPOCHS = 10
model.train()
for epoch in range(EPOCHS):
    losses = []
    for batch_x, batch_y in train_loader:
        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device)

        optimizer.zero_grad()
        output = model(batch_x)
        loss = criterion(output, batch_y)
        loss.backward()
        optimizer.step()
        losses.append(loss.item())

    print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {np.mean(losses):.4f}")

Epoch 1/10, Loss: 0.0467
Epoch 2/10, Loss: 0.0422
Epoch 3/10, Loss: 0.0413
Epoch 4/10, Loss: 0.0406
Epoch 5/10, Loss: 0.0399
Epoch 6/10, Loss: 0.0400
Epoch 7/10, Loss: 0.0398
Epoch 8/10, Loss: 0.0390
Epoch 9/10, Loss: 0.0387
Epoch 10/10, Loss: 0.0386


In [None]:
# Прогноз и метрики

model.eval()
y_true, y_pred = [], []

with torch.no_grad():
    for x, y in test_loader:
        x = x.to(device)
        output = model(x)
        y_true.append(y.squeeze().cpu().numpy())
        y_pred.append(output.squeeze().cpu().numpy())

# Обратная трансформация
y_true = scaler.inverse_transform(np.array(y_true).reshape(-1, 1)).reshape(-1, PRED_LEN)
y_pred = scaler.inverse_transform(np.array(y_pred).reshape(-1, 1)).reshape(-1, PRED_LEN)

# Метрики
def mase(y_true, y_pred, y_train, m=1):
    # MASE = MAE(model) / MAE(naive)
    naive_forecast = y_train[m:] - y_train[:-m]
    mae_naive = np.mean(np.abs(naive_forecast))
    mae_model = mean_absolute_error(y_true, y_pred)
    return mae_model / mae_naive

def wape(y_true, y_pred):
    return np.sum(np.abs(y_true - y_pred)) / np.sum(np.abs(y_true)) * 100

# MAE, RMSE, MASE, WAPE
mae = mean_absolute_error(y_true, y_pred)
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
mase_val = mase(y_true.flatten(), y_pred.flatten(), scaler.inverse_transform(train_data).flatten())
wape_val = wape(y_true, y_pred)

# Таблица
metrics_df = pd.DataFrame([{
    "Region": REGION,
    "Model": "segRNN",
    "MAE": mae,
    "RMSE": rmse,
    "MASE": mase_val,
    "WAPE (%)": wape_val
}])

display(metrics_df)

Unnamed: 0,Region,Model,MAE,RMSE,MASE,WAPE (%)
0,Barents Sea,segRNN,1.655951,2.716178,2.469986,12.499788


In [None]:
from tqdm import tqdm

FOLDS = 15
VAL_SIZE = 7  # дней в прогнозе
fold_metrics = []

series_full = scaler.fit_transform(series)

for i in range(FOLDS):
    split_point = split + i * VAL_SIZE

    train_part = series_full[:split_point]
    val_part = series_full[split_point - SEQ_LEN - PRED_LEN: split_point + PRED_LEN]

    train_dataset = SeaIceDataset(train_part, SEQ_LEN, PRED_LEN)
    val_dataset = SeaIceDataset(val_part, SEQ_LEN, PRED_LEN)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)

    # заново инициализируем модель и оптимизатор
    model = segRNN(seq_len=SEQ_LEN, pred_len=PRED_LEN, seg_len=SEG_LEN).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

    # обучение
    model.train()
    for epoch in range(10):  # укороченное обучение на фолде
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            loss = criterion(model(x), y)
            loss.backward()
            optimizer.step()

    # валидация
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for x, y in val_loader:
            out = model(x.to(device)).cpu().numpy()
            y_true.append(y.numpy().squeeze())
            y_pred.append(out.squeeze())

    y_true = scaler.inverse_transform(np.array(y_true).reshape(-1, 1)).reshape(-1, PRED_LEN)
    y_pred = scaler.inverse_transform(np.array(y_pred).reshape(-1, 1)).reshape(-1, PRED_LEN)

    fold_metrics.append({
        "Fold": i + 1,
        "MAE": mean_absolute_error(y_true, y_pred),
        "RMSE": np.sqrt(mean_squared_error(y_true, y_pred)),
        "MASE": mase(y_true.flatten(), y_pred.flatten(), scaler.inverse_transform(train_part).flatten()),
        "WAPE (%)": wape(y_true, y_pred)
    })

# Таблица
cv_df = pd.DataFrame(fold_metrics)
display(cv_df)
print("\nСредние метрики по фолдам:")
display(cv_df.mean(numeric_only=True).to_frame().T)

Unnamed: 0,Fold,MAE,RMSE,MASE,WAPE (%)
0,1,4.837312,5.739775,7.215248,19.496557
1,2,3.524592,4.279084,5.239032,13.82157
2,3,1.955841,2.367095,2.900144,7.308134
3,4,2.556754,3.147986,3.791313,11.100388
4,5,1.714522,2.035601,2.538854,8.211056
5,6,1.65476,1.960458,2.45108,9.246192
6,7,1.426988,1.65685,2.114177,10.561779
7,8,0.792792,1.069898,1.174958,8.112891
8,9,0.780286,0.960428,1.158368,10.907716
9,10,1.003445,1.1861,1.49146,17.09304



Средние метрики по фолдам:


Unnamed: 0,Fold,MAE,RMSE,MASE,WAPE (%)
0,8.0,1.602147,1.936875,2.381976,14.347402


In [None]:
# Сбор метрик по всем регионам

REGIONS = ["Barents Sea", "Kara Sea", "Chukchi Sea", "Laptev Sea", "East Siberian Sea"]
FOLDS = 15
VAL_SIZE = 7
results = []

for region in REGIONS:
    print(f"\n Регион: {region}")
    series = df[[region]].values
    scaler = StandardScaler()
    series_scaled = scaler.fit_transform(series)

    for i in range(FOLDS):
        split_point = int(len(series_scaled) * 0.7) + i * VAL_SIZE

        train_part = series_scaled[:split_point]
        val_part = series_scaled[split_point - SEQ_LEN - PRED_LEN: split_point + PRED_LEN]

        train_dataset = SeaIceDataset(train_part, SEQ_LEN, PRED_LEN)
        val_dataset = SeaIceDataset(val_part, SEQ_LEN, PRED_LEN)

        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)

        model = segRNN(seq_len=SEQ_LEN, pred_len=PRED_LEN, seg_len=SEG_LEN).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

        model.train()
        for epoch in range(5):
            for x, y in train_loader:
                x, y = x.to(device), y.to(device)
                optimizer.zero_grad()
                loss = criterion(model(x), y)
                loss.backward()
                optimizer.step()

        model.eval()
        y_true, y_pred = [], []
        with torch.no_grad():
            for x, y in val_loader:
                out = model(x.to(device)).cpu().numpy()
                y_true.append(y.numpy().squeeze())
                y_pred.append(out.squeeze())

        y_true = scaler.inverse_transform(np.array(y_true).reshape(-1, 1)).reshape(-1, PRED_LEN)
        y_pred = scaler.inverse_transform(np.array(y_pred).reshape(-1, 1)).reshape(-1, PRED_LEN)

        results.append({
            "Region": region,
            "Fold": i + 1,
            "MAE": mean_absolute_error(y_true, y_pred),
            "RMSE": np.sqrt(mean_squared_error(y_true, y_pred)),
            "MASE": mase(y_true.flatten(), y_pred.flatten(), scaler.inverse_transform(train_part).flatten()),
            "WAPE (%)": wape(y_true, y_pred)
        })

# Таблица метрик по фолдам
cv_all = pd.DataFrame(results)
display(cv_all)
cv_all.to_csv("segrnn_cv_all.csv", index=False)

# Среднее по регионам и по всем фолдам
avg_per_region_segrnn = cv_all.groupby("Region").mean(numeric_only=True).reset_index()
avg_per_region_segrnn['Model'] = 'segRNN'
avg_total_segrnn = pd.DataFrame([{
    "Region": "All regions avg",
    "MAE": avg_per_region_segrnn["MAE"].mean(),
    "RMSE": avg_per_region_segrnn["RMSE"].mean(),
    "MASE": avg_per_region_segrnn["MASE"].mean(),
    "WAPE (%)": avg_per_region_segrnn["WAPE (%)"].mean()
}])

# Таблица метрик по регионам
display(pd.concat([avg_per_region_segrnn, avg_total_segrnn], ignore_index=True))


 Регион: Barents Sea

 Регион: Kara Sea

 Регион: Chukchi Sea

 Регион: Laptev Sea

 Регион: East Siberian Sea


Unnamed: 0,Region,Fold,MAE,RMSE,MASE,WAPE (%)
0,Barents Sea,1,4.593381,5.310888,6.851402,18.513399
1,Barents Sea,2,3.763193,4.532328,5.593693,14.757236
2,Barents Sea,3,1.900677,2.395366,2.818346,7.102009
3,Barents Sea,4,2.761995,3.445646,4.095657,11.991461
4,Barents Sea,5,1.612618,1.902059,2.387956,7.723026
...,...,...,...,...,...,...
70,East Siberian Sea,11,2.221030,2.702115,2.486604,4.092013
71,East Siberian Sea,12,5.161695,6.152032,5.758839,11.786422
72,East Siberian Sea,13,2.507174,3.027072,2.789536,7.888322
73,East Siberian Sea,14,2.392269,2.842912,2.654464,10.966201


Unnamed: 0,Region,Fold,MAE,RMSE,MASE,WAPE (%),Model
0,Barents Sea,8.0,1.638826,1.972179,2.436607,15.420807,segRNN
1,Chukchi Sea,8.0,2.396958,2.913194,2.425272,6.629925,segRNN
2,East Siberian Sea,8.0,2.500395,3.109217,2.806606,4.750016,segRNN
3,Kara Sea,8.0,2.512649,3.112004,2.040258,4.426573,segRNN
4,Laptev Sea,8.0,2.832704,3.52952,2.567139,9.289341,segRNN
5,All regions avg,,2.376306,2.927223,2.455177,8.103333,


In [None]:
# Сбор метрик по всем регионам для 10 эпох

REGIONS = ["Barents Sea", "Kara Sea", "Chukchi Sea", "Laptev Sea", "East Siberian Sea"]
FOLDS = 15
VAL_SIZE = 7
results_10 = []

for region in REGIONS:
    print(f"\n Регион: {region}")
    series = df[[region]].values
    scaler = StandardScaler()
    series_scaled = scaler.fit_transform(series)

    for i in range(FOLDS):
        split_point = int(len(series_scaled) * 0.7) + i * VAL_SIZE

        train_part = series_scaled[:split_point]
        val_part = series_scaled[split_point - SEQ_LEN - PRED_LEN: split_point + PRED_LEN]

        train_dataset = SeaIceDataset(train_part, SEQ_LEN, PRED_LEN)
        val_dataset = SeaIceDataset(val_part, SEQ_LEN, PRED_LEN)

        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)

        model = segRNN(seq_len=SEQ_LEN, pred_len=PRED_LEN, seg_len=SEG_LEN).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

        model.train()
        for epoch in range(10):
            for x, y in train_loader:
                x, y = x.to(device), y.to(device)
                optimizer.zero_grad()
                loss = criterion(model(x), y)
                loss.backward()
                optimizer.step()

        model.eval()
        y_true, y_pred = [], []
        with torch.no_grad():
            for x, y in val_loader:
                out = model(x.to(device)).cpu().numpy()
                y_true.append(y.numpy().squeeze())
                y_pred.append(out.squeeze())

        y_true = scaler.inverse_transform(np.array(y_true).reshape(-1, 1)).reshape(-1, PRED_LEN)
        y_pred = scaler.inverse_transform(np.array(y_pred).reshape(-1, 1)).reshape(-1, PRED_LEN)

        results_10.append({
            "Region": region,
            "Fold": i + 1,
            "MAE": mean_absolute_error(y_true, y_pred),
            "RMSE": np.sqrt(mean_squared_error(y_true, y_pred)),
            "MASE": mase(y_true.flatten(), y_pred.flatten(), scaler.inverse_transform(train_part).flatten()),
            "WAPE (%)": wape(y_true, y_pred)
        })

# Таблица метрик по фолдам
cv_all_10 = pd.DataFrame(results_10)
display(cv_all_10)
cv_all_10.to_csv("segrnn_cv_all_10.csv", index=False)

# Среднее по регионам и по всем фолдам
avg_per_region_segrnn = cv_all_10.groupby("Region").mean(numeric_only=True).reset_index()
avg_per_region_segrnn['Model'] = 'segRNN'
avg_total_segrnn = pd.DataFrame([{
    "Region": "All regions avg",
    "MAE": avg_per_region_segrnn["MAE"].mean(),
    "RMSE": avg_per_region_segrnn["RMSE"].mean(),
    "MASE": avg_per_region_segrnn["MASE"].mean(),
    "WAPE (%)": avg_per_region_segrnn["WAPE (%)"].mean()
}])

# Таблица метрик по регионам
display(pd.concat([avg_per_region_segrnn, avg_total_segrnn], ignore_index=True))


 Регион: Barents Sea

 Регион: Kara Sea

 Регион: Chukchi Sea

 Регион: Laptev Sea

 Регион: East Siberian Sea


Unnamed: 0,Region,Fold,MAE,RMSE,MASE,WAPE (%)
0,Barents Sea,1,4.981828,6.022028,7.430803,20.079018
1,Barents Sea,2,3.753499,4.548601,5.579283,14.719218
2,Barents Sea,3,1.928410,2.323126,2.859468,7.205634
3,Barents Sea,4,2.602330,3.247944,3.858897,11.298262
4,Barents Sea,5,1.517712,1.822190,2.247419,7.268507
...,...,...,...,...,...,...
70,East Siberian Sea,11,2.010479,2.414192,2.250877,3.704095
71,East Siberian Sea,12,4.714066,5.596522,5.259426,10.764292
72,East Siberian Sea,13,2.469305,2.968369,2.747403,7.769177
73,East Siberian Sea,14,2.517511,3.112117,2.793432,11.540310


Unnamed: 0,Region,Fold,MAE,RMSE,MASE,WAPE (%),Model
0,Barents Sea,8.0,1.634595,1.988195,2.430566,15.052551,segRNN
1,Chukchi Sea,8.0,2.359473,2.887511,2.387248,7.916465,segRNN
2,East Siberian Sea,8.0,2.536693,3.139368,2.848728,4.741198,segRNN
3,Kara Sea,8.0,2.404105,2.917161,1.952364,4.183853,segRNN
4,Laptev Sea,8.0,2.682072,3.36077,2.430849,8.784705,segRNN
5,All regions avg,,2.323387,2.858601,2.409951,8.135755,


In [None]:
display(avg_total_segrnn)

Unnamed: 0,Region,MAE,RMSE,MASE,WAPE (%)
0,All regions avg,2.345065,2.876339,2.438465,8.174762


# 2. Модель LSTM

In [None]:
# Модель

class LSTMForecaster(nn.Module):
    def __init__(self, input_size=1, hidden_size=64, num_layers=1, output_size=7):
        super(LSTMForecaster, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # x: [B, seq_len, 1]
        out, _ = self.lstm(x)
        out = out[:, -1, :]  # берём последний временной шаг
        out = self.linear(out)  # [B, 7]
        return out.unsqueeze(-1)  # [B, 7, 1] — как y_true

In [None]:
# Кросс-валидация временных рядов по регионам

results_lstm = []

for region in REGIONS:
    print(f"\n=== Регион: {region} ===")
    series = df[[region]].values
    scaler = StandardScaler()
    series_scaled = scaler.fit_transform(series)

    for i in range(FOLDS):
        split_point = int(len(series_scaled) * 0.7) + i * VAL_SIZE

        train_part = series_scaled[:split_point]
        val_part = series_scaled[split_point - SEQ_LEN - PRED_LEN: split_point + PRED_LEN]

        train_dataset = SeaIceDataset(train_part, SEQ_LEN, PRED_LEN)
        val_dataset = SeaIceDataset(val_part, SEQ_LEN, PRED_LEN)

        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)

        model = LSTMForecaster().to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
        criterion = nn.MSELoss()

        # обучение
        model.train()
        for epoch in range(5):
            for x, y in train_loader:
                x, y = x.to(device), y.to(device)
                output = model(x)
                loss = criterion(output, y)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        # предсказание
        model.eval()
        y_true, y_pred = [], []
        with torch.no_grad():
            for x, y in val_loader:
                out = model(x.to(device)).cpu().numpy()
                y_true.append(y.numpy().squeeze())
                y_pred.append(out.squeeze())

        y_true = scaler.inverse_transform(np.array(y_true).reshape(-1, 1)).reshape(-1, PRED_LEN)
        y_pred = scaler.inverse_transform(np.array(y_pred).reshape(-1, 1)).reshape(-1, PRED_LEN)

        results_lstm.append({
            "Region": region,
            "Fold": i + 1,
            "MAE": mean_absolute_error(y_true, y_pred),
            "RMSE": np.sqrt(mean_squared_error(y_true, y_pred)),
            "MASE": mase(y_true.flatten(), y_pred.flatten(), scaler.inverse_transform(train_part).flatten()),
            "WAPE (%)": wape(y_true, y_pred)
        })


=== Регион: Barents Sea ===

=== Регион: Kara Sea ===

=== Регион: Chukchi Sea ===

=== Регион: Laptev Sea ===

=== Регион: East Siberian Sea ===


In [None]:
# Вывод таблицы результатов

cv_lstm = pd.DataFrame(results_lstm)

avg_per_region_lstm = cv_lstm.groupby("Region").mean(numeric_only=True).reset_index()
avg_total_lstm = pd.DataFrame([{
    "Region": "All regions avg",
    "MAE": avg_per_region_lstm["MAE"].mean(),
    "RMSE": avg_per_region_lstm["RMSE"].mean(),
    "MASE": avg_per_region_lstm["MASE"].mean(),
    "WAPE (%)": avg_per_region_lstm["WAPE (%)"].mean()
}])

avg_per_region_lstm['Model'] = 'LSTM'
final_LSTM = pd.concat([avg_per_region_lstm, avg_total_lstm], ignore_index=True)
display(final_LSTM)

Unnamed: 0,Region,Fold,MAE,RMSE,MASE,WAPE (%),Model
0,Barents Sea,8.0,2.223921,2.603406,3.305786,23.173231,LSTM
1,Chukchi Sea,8.0,3.082224,3.520482,3.12085,7.550424,LSTM
2,East Siberian Sea,8.0,4.16706,4.70992,4.680398,8.147806,LSTM
3,Kara Sea,8.0,3.605788,4.025306,2.929843,7.01021,LSTM
4,Laptev Sea,8.0,4.028553,4.659424,3.64892,19.659431,LSTM
5,All regions avg,,3.421509,3.903708,3.537159,13.10822,


# 3. Модель LSTM+Attention

In [None]:
# Модель

class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super().__init__()
        self.attn = nn.Linear(hidden_dim, 1)

    def forward(self, lstm_out):
        # lstm_out: [B, T, H]
        scores = self.attn(lstm_out)  # [B, T, 1]
        weights = torch.softmax(scores, dim=1)  # [B, T, 1]
        context = torch.sum(weights * lstm_out, dim=1)  # [B, H]
        return context, weights

class LSTMWithAttention(nn.Module):
    def __init__(self, input_size=1, hidden_size=64, output_size=7):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.attn = Attention(hidden_size)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # x: [B, T, 1]
        lstm_out, _ = self.lstm(x)  # [B, T, H]
        context, _ = self.attn(lstm_out)  # [B, H]
        out = self.fc(context)  # [B, 7]
        return out.unsqueeze(-1)  # [B, 7, 1]

In [None]:
# Кросс-валидация по регионам

results_lstm_attn = []

for region in REGIONS:
    print(f"\n=== Регион: {region} ===")
    series = df[[region]].values
    scaler = StandardScaler()
    series_scaled = scaler.fit_transform(series)

    for i in range(FOLDS):
        split_point = int(len(series_scaled) * 0.7) + i * VAL_SIZE

        train_part = series_scaled[:split_point]
        val_part = series_scaled[split_point - SEQ_LEN - PRED_LEN: split_point + PRED_LEN]

        train_dataset = SeaIceDataset(train_part, SEQ_LEN, PRED_LEN)
        val_dataset = SeaIceDataset(val_part, SEQ_LEN, PRED_LEN)

        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)

        model = LSTMWithAttention().to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
        criterion = nn.MSELoss()

        model.train()
        for epoch in range(5):
            for x, y in train_loader:
                x, y = x.to(device), y.to(device)
                output = model(x)
                loss = criterion(output, y)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        model.eval()
        y_true, y_pred = [], []
        with torch.no_grad():
            for x, y in val_loader:
                out = model(x.to(device)).cpu().numpy()
                y_true.append(y.numpy().squeeze())
                y_pred.append(out.squeeze())

        y_true = scaler.inverse_transform(np.array(y_true).reshape(-1, 1)).reshape(-1, PRED_LEN)
        y_pred = scaler.inverse_transform(np.array(y_pred).reshape(-1, 1)).reshape(-1, PRED_LEN)

        results_lstm_attn.append({
            "Region": region,
            "Fold": i + 1,
            "MAE": mean_absolute_error(y_true, y_pred),
            "RMSE": np.sqrt(mean_squared_error(y_true, y_pred)),
            "MASE": mase(y_true.flatten(), y_pred.flatten(), scaler.inverse_transform(train_part).flatten()),
            "WAPE (%)": wape(y_true, y_pred)
        })


=== Регион: Barents Sea ===

=== Регион: Kara Sea ===

=== Регион: Chukchi Sea ===

=== Регион: Laptev Sea ===

=== Регион: East Siberian Sea ===


In [None]:
# Вывод таблицы результатов

cv_attn = pd.DataFrame(results_lstm_attn)

avg_per_region_attn = cv_attn.groupby("Region").mean(numeric_only=True).reset_index()
avg_total_attn = pd.DataFrame([{
    "Region": "All regions avg",
    "MAE": avg_per_region_attn["MAE"].mean(),
    "RMSE": avg_per_region_attn["RMSE"].mean(),
    "MASE": avg_per_region_attn["MASE"].mean(),
    "WAPE (%)": avg_per_region_attn["WAPE (%)"].mean()
}])

avg_per_region_attn['Model'] = 'LSTM+Attention'
final_attn = pd.concat([avg_per_region_attn, avg_total_attn], ignore_index=True)
display(final_attn)

Unnamed: 0,Region,Fold,MAE,RMSE,MASE,WAPE (%),Model
0,Barents Sea,8.0,2.791372,3.081518,4.14684,33.435627,LSTM+Attention
1,Chukchi Sea,8.0,4.063886,4.437553,4.116,10.425588,LSTM+Attention
2,East Siberian Sea,8.0,7.473508,7.916821,8.397225,13.72753,LSTM+Attention
3,Kara Sea,8.0,4.448809,4.906785,3.615643,8.989025,LSTM+Attention
4,Laptev Sea,8.0,5.915817,6.482315,5.360355,25.642275,LSTM+Attention
5,All regions avg,,4.938678,5.364998,5.127212,18.44401,


4. SARIMA

In [None]:
!pip install -q statsmodels

from statsmodels.tsa.statespace.sarimax import SARIMAX

In [None]:
# SARIMA по всем регионам

FOLDS_SARIMA = 15
results_sarima = []

for region in REGIONS:
    print(f"\n=== Регион: {region} ===")
    series = df[[region]].dropna().values.flatten()
    series_len = len(series)
    fold_size = 7
    start = int(series_len * 0.5)  # начало фолдов — с середины временного ряда

    for i in range(FOLDS_SARIMA):
        train_end = start + i * fold_size
        test_start = train_end
        test_end = train_end + fold_size

        if test_end > series_len:
            break  # если не хватает данных для фолда

        train_series = series[:train_end]
        test_series = series[test_start:test_end]

        try:
            model = SARIMAX(train_series, order=(1, 1, 1), seasonal_order=(1, 1, 0, 12),
                            enforce_stationarity=False, enforce_invertibility=False)
            model_fit = model.fit(disp=False)
            pred = model_fit.forecast(steps=fold_size)

            results_sarima.append({
                "Region": region,
                "Fold": i + 1,
                "MAE": mean_absolute_error(test_series, pred),
                "RMSE": np.sqrt(mean_squared_error(test_series, pred)),
                "MASE": mase(test_series, pred, train_series),
                "WAPE (%)": wape(test_series, pred)
            })
        except Exception as e:
            print(f"SARIMA failed for {region}, fold {i + 1}: {e}")


=== Регион: Barents Sea ===

=== Регион: Kara Sea ===

=== Регион: Chukchi Sea ===

=== Регион: Laptev Sea ===

=== Регион: East Siberian Sea ===


In [None]:
# Таблица результатов

cv_sarima = pd.DataFrame(results_sarima)
display(cv_sarima)
cv_sarima.to_csv("sarima_cv_all.csv", index=False)

avg_per_region_sarima = cv_sarima.groupby("Region").mean(numeric_only=True).reset_index()
avg_total_sarima = pd.DataFrame([{
    "Region": "All regions avg",
    "MAE": avg_per_region_sarima["MAE"].mean(),
    "RMSE": avg_per_region_sarima["RMSE"].mean(),
    "MASE": avg_per_region_sarima["MASE"].mean(),
    "WAPE (%)": avg_per_region_sarima["WAPE (%)"].mean()
}])

avg_per_region_sarima['Model'] = 'SARIMA'
final_sarima = pd.concat([avg_per_region_sarima, avg_total_sarima], ignore_index=True)
display(final_sarima)

Unnamed: 0,Region,Fold,MAE,RMSE,MASE,WAPE (%)
0,Barents Sea,1,0.589215,0.726463,0.914263,6.794710
1,Barents Sea,2,0.554052,0.641442,0.860541,7.842053
2,Barents Sea,3,0.581660,0.757464,0.906320,11.437865
3,Barents Sea,4,1.435535,1.489921,2.237545,39.782369
4,Barents Sea,5,0.745024,0.819535,1.164826,24.339923
...,...,...,...,...,...,...
70,East Siberian Sea,11,0.221528,0.295363,0.247257,28.938940
71,East Siberian Sea,12,1.550075,1.551413,1.735761,237.913515
72,East Siberian Sea,13,0.129295,0.163427,0.145277,16.991141
73,East Siberian Sea,14,0.266364,0.300564,0.300439,25.049716


Unnamed: 0,Region,Fold,MAE,RMSE,MASE,WAPE (%),Model
0,Barents Sea,8.0,0.474732,0.531292,0.746083,32.445346,SARIMA
1,Chukchi Sea,8.0,0.986951,1.095152,0.976639,101.479127,SARIMA
2,East Siberian Sea,8.0,2.829377,3.190985,3.146043,127.951069,SARIMA
3,Kara Sea,8.0,1.705871,1.937123,1.351717,34.256793,SARIMA
4,Laptev Sea,8.0,2.074208,2.350635,1.793991,36.943685,SARIMA
5,All regions avg,,1.614228,1.821038,1.602894,66.615204,


# 5. Модель IndRNN

In [None]:
# Модель

class IndRNNCell(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(IndRNNCell, self).__init__()
        self.input_weight = nn.Linear(input_size, hidden_size)
        self.recurrent_weight = nn.Parameter(torch.Tensor(hidden_size))
        self.bias = nn.Parameter(torch.zeros(hidden_size))
        nn.init.uniform_(self.recurrent_weight, -0.5, 0.5)

    def forward(self, input, hidden):
        return torch.relu(self.input_weight(input) + self.recurrent_weight * hidden + self.bias)

class IndRNN(nn.Module):
    def __init__(self, input_size=1, hidden_size=64, output_size=7):
        super(IndRNN, self).__init__()
        self.hidden_size = hidden_size
        self.rnn_cell = IndRNNCell(input_size, hidden_size)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # x: [batch, seq_len, 1]
        batch_size, seq_len, _ = x.size()
        h = torch.zeros(batch_size, self.hidden_size, device=x.device)
        for t in range(seq_len):
            h = self.rnn_cell(x[:, t, :], h)
        out = self.fc(h)
        return out.unsqueeze(-1)  # [batch, 7, 1]

In [None]:
# Кросс-валидация по регионам

results_indrnn = []

for region in REGIONS:
    print(f"\n=== Регион: {region} ===")
    series = df[[region]].dropna().values
    scaler = StandardScaler()
    series_scaled = scaler.fit_transform(series)

    for fold in range(20):
        split_point = int(len(series_scaled) * 0.5) + fold * 7
        train_data = series_scaled[:split_point]
        val_data = series_scaled[split_point - SEQ_LEN - PRED_LEN: split_point + PRED_LEN]

        train_dataset = SeaIceDataset(train_data, SEQ_LEN, PRED_LEN)
        val_dataset = SeaIceDataset(val_data, SEQ_LEN, PRED_LEN)
        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)

        model = IndRNN(input_size=1, hidden_size=64, output_size=PRED_LEN).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
        criterion = nn.MSELoss()

        model.train()
        for epoch in range(10):
            for x_batch, y_batch in train_loader:
                x_batch, y_batch = x_batch.to(device), y_batch.to(device)
                optimizer.zero_grad()
                loss = criterion(model(x_batch), y_batch)
                loss.backward()
                optimizer.step()

        # Валидация
        model.eval()
        y_true, y_pred = [], []
        with torch.no_grad():
            for x_val, y_val in val_loader:
                pred = model(x_val.to(device)).cpu().numpy()
                y_true.append(y_val.numpy().squeeze())
                y_pred.append(pred.squeeze())

        y_true = scaler.inverse_transform(np.array(y_true).reshape(-1, 1)).reshape(-1, PRED_LEN)
        y_pred = scaler.inverse_transform(np.array(y_pred).reshape(-1, 1)).reshape(-1, PRED_LEN)

        results_indrnn.append({
            "Region": region,
            "Fold": fold + 1,
            "MAE": mean_absolute_error(y_true, y_pred),
            "RMSE": np.sqrt(mean_squared_error(y_true, y_pred)),
            "MASE": mase(y_true.flatten(), y_pred.flatten(), scaler.inverse_transform(train_data).flatten()),
            "WAPE (%)": wape(y_true, y_pred)
        })


=== Регион: Barents Sea ===

=== Регион: Kara Sea ===

=== Регион: Chukchi Sea ===

=== Регион: Laptev Sea ===

=== Регион: East Siberian Sea ===


In [None]:
# Результаты

cv_indrnn = pd.DataFrame(results_indrnn)

avg_per_region_indrnn = cv_indrnn.groupby("Region").mean(numeric_only=True).reset_index()
avg_total_indrnn = pd.DataFrame([{
    "Region": "All regions avg",
    "MAE": avg_per_region_indrnn["MAE"].mean(),
    "RMSE": avg_per_region_indrnn["RMSE"].mean(),
    "MASE": avg_per_region_indrnn["MASE"].mean(),
    "WAPE (%)": avg_per_region_indrnn["WAPE (%)"].mean()
}])


avg_per_region_indrnn["Model"] = "IndRNN"
avg_total_indrnn["Model"] = "IndRNN"
final_indrnn = pd.concat([avg_per_region_indrnn, avg_total_indrnn], ignore_index=True))
display(final_indrnn)

Unnamed: 0,Region,Fold,MAE,RMSE,MASE,WAPE (%),Model
0,Barents Sea,10.5,0.801376,0.961845,1.276599,30.434208,IndRNN
1,Chukchi Sea,10.5,1.0005,1.217871,1.003961,62.480095,IndRNN
2,East Siberian Sea,10.5,5.159553,6.126504,5.708067,58.473877,IndRNN
3,Kara Sea,10.5,3.574283,4.264732,2.858872,26.436975,IndRNN
4,Laptev Sea,10.5,4.675313,5.521394,4.049885,36.062271,IndRNN
5,All regions avg,,3.042205,3.618469,2.979477,42.777489,IndRNN


# Сравнительгая таблица реззультатов

In [None]:
# Объединяем
comparison_df = pd.concat([
    avg_per_region_segrnn,
    avg_per_region_lstm,
    avg_per_region_attn,
    avg_per_region_sarima,
    avg_per_region_indrnn
], ignore_index=True)

# Колонка Model первой
cols = ['Region', 'Model', 'MAE', 'RMSE', 'MASE', 'WAPE (%)']
comparison_df = comparison_df[cols]

# Сводка по всем регионам для каждой модели
overall_df = comparison_df.groupby("Model").mean(numeric_only=True).reset_index()
overall_df.insert(0, "Region", "All regions avg")

# Финальная таблица
final_df = pd.concat([comparison_df, overall_df], ignore_index=True)

# Вывод
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick

display(final_df.sort_values(by=["Region", "Model"]))

Unnamed: 0,Region,Model,MAE,RMSE,MASE,WAPE (%)
20,All regions avg,LSTM,3.421509,3.903708,3.537159,13.108221
21,All regions avg,LSTM+Attention,4.938678,5.364998,5.127212,18.444009
22,All regions avg,SARIMA,1.614228,1.821038,1.602894,66.615204
23,All regions avg,segRNN,2.345065,2.876339,2.438465,8.174762
5,Barents Sea,LSTM,2.223921,2.603406,3.305786,23.173231
10,Barents Sea,LSTM+Attention,2.791372,3.081518,4.14684,33.435627
15,Barents Sea,SARIMA,0.474732,0.531292,0.746083,32.445346
0,Barents Sea,segRNN,1.701706,2.036142,2.529474,15.711973
6,Chukchi Sea,LSTM,3.082224,3.520482,3.12085,7.550424
11,Chukchi Sea,LSTM+Attention,4.063886,4.437553,4.116,10.425588


In [None]:
final_df.sort_values(by=["Region", "Model"]).to_csv("results.csv", index=False)