In [3]:
# Install required packages.
# %%capture

# !pip install torch-geometric
# !pip install sentence_transformers

In [15]:
import torch
import torch.nn.functional as F
from torch.nn import Linear
from torch_geometric.datasets import MovieLens
from torch_geometric.nn import GCNConv, SAGEConv, to_hetero
import torch_geometric.transforms as T
import numpy as np

In [5]:
dataset_path = '/tmp/'
dataset = MovieLens(root=dataset_path)

Downloading https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
Extracting /tmp/raw/ml-latest-small.zip
Processing...


.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

Batches:   0%|          | 0/305 [00:00<?, ?it/s]

Done!


In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

data = dataset[0].to(device)

# Add user node features for message passing:
data['user'].x = torch.eye(data['user'].num_nodes, device=device)
del data['user'].num_nodes

# Add a reverse ('movie', 'rev_rates', 'user') relation for message passing:
data = T.ToUndirected()(data)
del data['movie', 'rev_rates', 'user'].edge_label  # Remove "reverse" label.

# Perform a link-level split into training, validation, and test edges:
train_data, val_data, test_data = T.RandomLinkSplit(
    num_val=0.1,
    num_test=0.1,
    neg_sampling_ratio=0.0,
    edge_types=[('user', 'rates', 'movie')],
    rev_edge_types=[('movie', 'rev_rates', 'user')],
)(data)


weight = torch.bincount(train_data['user', 'movie'].edge_label)
weight = weight.max() / weight

In [64]:
def weighted_mse_loss(pred, target, weight=None):
    weight = 1. if weight is None else weight[target].to(pred.dtype)
    return (weight * (pred - target.to(pred.dtype)).pow(2)).mean()


class GNNEncoder(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = SAGEConv((-1, -1), hidden_channels)
        self.conv2 = SAGEConv((-1, -1), out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index)
        return x


class EdgeDecoder(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.lin1 = Linear(2 * hidden_channels, hidden_channels)
        self.lin2 = Linear(hidden_channels, 1)

    def forward(self, z_dict, edge_label_index):
        row, col = edge_label_index
        z = torch.cat([z_dict['user'][row], z_dict['movie'][col]], dim=-1)

        z = self.lin1(z).relu()
        z = self.lin2(z)
        return z.view(-1)


class Model(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.encoder = GNNEncoder(hidden_channels, hidden_channels)
        self.encoder = to_hetero(self.encoder, data.metadata(), aggr='sum')
        self.decoder = EdgeDecoder(hidden_channels)

    def forward(self, x_dict, edge_index_dict, edge_label_index):
        z_dict = self.encoder(x_dict, edge_index_dict)
        return self.decoder(z_dict, edge_label_index)



def train(model, optimizer):
    model.train()
    optimizer.zero_grad()
    pred = model(train_data.x_dict, train_data.edge_index_dict,
                 train_data['user', 'movie'].edge_label_index)
    target = train_data['user', 'movie'].edge_label
    loss = weighted_mse_loss(pred, target, weight)
    loss.backward()
    optimizer.step()
    return float(loss)


@torch.no_grad()
def test(data, model):
    model.eval()
    pred = model(data.x_dict, data.edge_index_dict,
                 data['user', 'movie'].edge_label_index)
    pred = pred.clamp(min=0, max=5)
    target = data['user', 'movie'].edge_label.float()
    rmse = F.mse_loss(pred, target).sqrt()
    return float(rmse)

In [65]:
from tqdm.notebook import tqdm

def start_train(model, optimizer, max_epochs = 300, print_each = 100):
    for epoch in tqdm(range(1, max_epochs)):
        loss = train(model, optimizer)
        train_rmse = test(train_data, model)
        val_rmse = test(val_data, model)
        test_rmse = test(test_data, model)
        if epoch % print_each == 0 or (epoch + 1) == max_epochs:
            print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train: {train_rmse:.4f}, '
                  f'Val: {val_rmse:.4f}, Test: {test_rmse:.4f}')

In [66]:
model = Model(hidden_channels=32).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [67]:
start_train(model, optimizer, max_epochs = 1000)

  0%|          | 0/999 [00:00<?, ?it/s]

Epoch: 100, Loss: 3.0476, Train: 1.1063, Val: 1.1588, Test: 1.1545
Epoch: 200, Loss: 2.7512, Train: 1.0798, Val: 1.1511, Test: 1.1397
Epoch: 300, Loss: 2.4549, Train: 1.0207, Val: 1.1275, Test: 1.1134
Epoch: 400, Loss: 2.2115, Train: 0.9774, Val: 1.1168, Test: 1.0958
Epoch: 500, Loss: 2.9173, Train: 1.0834, Val: 1.2190, Test: 1.1995
Epoch: 600, Loss: 1.9695, Train: 0.9530, Val: 1.1202, Test: 1.1014
Epoch: 700, Loss: 2.3099, Train: 0.9664, Val: 1.1058, Test: 1.0944
Epoch: 800, Loss: 1.7179, Train: 0.9092, Val: 1.1086, Test: 1.0901
Epoch: 900, Loss: 1.6334, Train: 0.8843, Val: 1.0958, Test: 1.0806
Epoch: 999, Loss: 1.6002, Train: 0.8882, Val: 1.1057, Test: 1.0939


# Вообще наблюдаем, что test loss не прям стабильно понижается, но иногда преодолевает порог в 1.1
# Попробуем улучшить модель

## Задание

## 1) Подберите оптимальные параметры для сети из примера выше(2 балла)


In [272]:
class GNNEncoderV2(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels, num_layers=1):
        super().__init__()
        self.conv_in = SAGEConv((-1, -1), hidden_channels)
        self.conv_layers = torch.nn.ModuleList([
            SAGEConv((-1, -1), hidden_channels // 2) for _ in range(num_layers)
        ])
        self.conv_out = SAGEConv((-1, -1), out_channels)

    def forward(self, x, edge_index):
        x = self.conv_in(x, edge_index).relu()
        
        for conv_layer in self.conv_layers:
            x = conv_layer(x, edge_index).relu()
    
        x = self.conv_out(x, edge_index)
        return x

    
# Первый слой - Linear
class EdgeDecoderV2(torch.nn.Module):
    def __init__(self, hidden_channels, num_lin_layers=1, num_conv_layers=1):
        super().__init__()
        self.lin_in = Linear(2 * hidden_channels, hidden_channels)
        
        self.conv_layers = torch.nn.ModuleList([
            GCNConv(hidden_channels, hidden_channels) for _ in range(num_conv_layers)
        ])
        
        self.lin_layers = torch.nn.ModuleList([
            Linear(hidden_channels, hidden_channels) for _ in range(num_lin_layers)
        ])
        
        self.lin_out = Linear(hidden_channels, 1)

    def forward(self, z_dict, edge_label_index):
        row, col = edge_label_index
        z = torch.cat([z_dict['user'][row], z_dict['movie'][col]], dim=-1)

        z = self.lin_in(z).relu()
        
        for conv_layer in self.conv_layers:
            z = conv_layer(z, edge_index=edge_label_index).relu()
        
        for lin_layer in self.lin_layers:
            z = lin_layer(z).relu()
            
        z = self.lin_out(z)
        return z.view(-1)

# Первый слой - GCN
class EdgeDecoderV3(torch.nn.Module):
    def __init__(self, hidden_channels, num_lin_layers=1, num_conv_layers=2):
        super().__init__()
        self.graph_conv = GCNConv(2 * hidden_channels, hidden_channels)
        
        self.conv_layers = torch.nn.ModuleList([
            GCNConv(hidden_channels, hidden_channels) for _ in range(num_conv_layers)
        ])
        
        self.lin_layers = torch.nn.ModuleList([
            Linear(hidden_channels, hidden_channels) for _ in range(num_lin_layers)
        ])
        
        self.lin_out = Linear(hidden_channels, 1)

    def forward(self, z_dict, edge_label_index):
        row, col = edge_label_index
        z = torch.cat([z_dict['user'][row], z_dict['movie'][col]], dim=-1)

        z = self.graph_conv(z, edge_index=edge_label_index).relu()

        for conv_layer in self.conv_layers:
            z = conv_layer(z, edge_index=edge_label_index).relu()
        
        for lin_layer in self.lin_layers:
            z = lin_layer(z).relu()
            
        z = self.lin_out(z)
        return z.view(-1)

    
class EdgeDecoderV4(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.lin_in = Linear(2 * hidden_channels, hidden_channels)
        
        self.lin_layers = torch.nn.ModuleList([
            Linear(hidden_channels, hidden_channels // 2),
            Linear(hidden_channels // 2, hidden_channels // 4),
            Linear(hidden_channels // 4, hidden_channels // 8),
        ])
        
        self.lin_out = Linear(hidden_channels // 8, 1)

    def forward(self, z_dict, edge_label_index):
        row, col = edge_label_index
        z = torch.cat([z_dict['user'][row], z_dict['movie'][col]], dim=-1)

        z = self.lin_in(z).relu()
        
        for lin_layer in self.lin_layers:
            z = lin_layer(z).relu()
            
        z = self.lin_out(z)
        return z.view(-1)


class ModelV2(torch.nn.Module):
    def __init__(self, hidden_channels, encoder_layers=1, decoder_ll=1, decoder_cl=1):
        super().__init__()
        self.encoder = GNNEncoderV2(hidden_channels, hidden_channels, num_layers=encoder_layers)

        self.encoder = to_hetero(self.encoder, data.metadata(), aggr='sum')

#         self.decoder = EdgeDecoderV2(hidden_channels, num_lin_layers=decoder_ll, num_conv_layers=decoder_cl)
#         self.decoder = EdgeDecoderV3(hidden_channels, num_lin_layers=decoder_ll, num_conv_layers=decoder_cl)
        self.decoder = EdgeDecoderV4(hidden_channels)

    def forward(self, x_dict, edge_index_dict, edge_label_index):
        z_dict = self.encoder(x_dict, edge_index_dict)
        return self.decoder(z_dict, edge_label_index)


In [241]:
model_v2 = ModelV2(hidden_channels=128, encoder_layers=0, decoder_ll=0, decoder_cl=0).to(device)
optimizerV2 = torch.optim.Adam(model_v2.parameters(), lr=0.005)

In [236]:
model_v2

ModelV2(
  (encoder): GraphModule(
    (conv_in): ModuleDict(
      (user__rates__movie): SAGEConv((-1, -1), 128, aggr=mean)
      (movie__rev_rates__user): SAGEConv((-1, -1), 128, aggr=mean)
    )
    (conv_layers): ModuleList(
      (0): ModuleDict(
        (user__rates__movie): SAGEConv((-1, -1), 128, aggr=mean)
        (movie__rev_rates__user): SAGEConv((-1, -1), 128, aggr=mean)
      )
    )
    (conv_out): ModuleDict(
      (user__rates__movie): SAGEConv((-1, -1), 128, aggr=mean)
      (movie__rev_rates__user): SAGEConv((-1, -1), 128, aggr=mean)
    )
  )
  (decoder): EdgeDecoderV4(
    (lin_in): Linear(in_features=256, out_features=128, bias=True)
    (lin_layers): ModuleList(
      (0): Linear(in_features=128, out_features=64, bias=True)
      (1): Linear(in_features=64, out_features=32, bias=True)
    )
    (lin_out): Linear(in_features=32, out_features=1, bias=True)
  )
)

In [229]:
# input 64
start_train(model_v2, optimizerV2, max_epochs = 1000, print_each = 100)

  0%|          | 0/999 [00:00<?, ?it/s]

Epoch: 100, Loss: 2.9585, Train: 1.1052, Val: 1.1633, Test: 1.1580
Epoch: 200, Loss: 2.4466, Train: 1.0120, Val: 1.1259, Test: 1.1070
Epoch: 300, Loss: 2.0791, Train: 0.9673, Val: 1.1099, Test: 1.0993
Epoch: 400, Loss: 1.8877, Train: 0.9285, Val: 1.1028, Test: 1.0890
Epoch: 500, Loss: 1.7203, Train: 0.8969, Val: 1.0955, Test: 1.0801
Epoch: 600, Loss: 1.6496, Train: 0.8796, Val: 1.1062, Test: 1.0887
Epoch: 700, Loss: 1.3921, Train: 0.8452, Val: 1.1122, Test: 1.0979
Epoch: 800, Loss: 1.3222, Train: 0.8148, Val: 1.1070, Test: 1.0948
Epoch: 900, Loss: 1.2976, Train: 0.7918, Val: 1.1124, Test: 1.1013
Epoch: 999, Loss: 1.1384, Train: 0.8109, Val: 1.1280, Test: 1.1184


In [232]:
# input 128
start_train(model_v2, optimizerV2, max_epochs = 1000, print_each = 100)

  0%|          | 0/999 [00:00<?, ?it/s]

Epoch: 100, Loss: 2.7383, Train: 1.0955, Val: 1.1787, Test: 1.1664
Epoch: 200, Loss: 2.2472, Train: 1.0176, Val: 1.1483, Test: 1.1381
Epoch: 300, Loss: 2.0004, Train: 0.9223, Val: 1.0848, Test: 1.0726
Epoch: 400, Loss: 1.8706, Train: 0.9106, Val: 1.0954, Test: 1.0964
Epoch: 500, Loss: 1.6910, Train: 0.8569, Val: 1.0745, Test: 1.0805
Epoch: 600, Loss: 1.4754, Train: 0.8412, Val: 1.0892, Test: 1.0841
Epoch: 700, Loss: 1.3358, Train: 0.8325, Val: 1.1008, Test: 1.0961
Epoch: 800, Loss: 1.3336, Train: 0.7987, Val: 1.0738, Test: 1.0732
Epoch: 900, Loss: 1.1824, Train: 0.7801, Val: 1.1000, Test: 1.0989
Epoch: 999, Loss: 1.0763, Train: 0.7543, Val: 1.0985, Test: 1.1007


In [242]:
# input 128 encoder layers 0
model_v2 = ModelV2(hidden_channels=128, encoder_layers=0, decoder_ll=0, decoder_cl=0).to(device)
optimizerV2 = torch.optim.Adam(model_v2.parameters(), lr=0.005)
start_train(model_v2, optimizerV2, max_epochs = 1000, print_each = 100)

  0%|          | 0/999 [00:00<?, ?it/s]

Epoch: 100, Loss: 3.1194, Train: 1.1066, Val: 1.1512, Test: 1.1466
Epoch: 200, Loss: 2.3580, Train: 1.0061, Val: 1.1300, Test: 1.1120
Epoch: 300, Loss: 1.9621, Train: 0.9381, Val: 1.0894, Test: 1.0628
Epoch: 400, Loss: 1.7678, Train: 0.9079, Val: 1.0839, Test: 1.0584
Epoch: 500, Loss: 1.6540, Train: 0.8733, Val: 1.0672, Test: 1.0491
Epoch: 600, Loss: 1.4914, Train: 0.8514, Val: 1.0881, Test: 1.0764
Epoch: 700, Loss: 1.4342, Train: 0.8292, Val: 1.0962, Test: 1.0783
Epoch: 800, Loss: 1.3005, Train: 0.8085, Val: 1.0985, Test: 1.0769
Epoch: 900, Loss: 1.1718, Train: 0.7882, Val: 1.1130, Test: 1.0945
Epoch: 999, Loss: 1.0787, Train: 0.7554, Val: 1.1131, Test: 1.0956


### На 1000 эпохах лучший результат показала: 

In [247]:
# input 96 encoder layers 0 decoder layers 2
model_v2 = ModelV2(hidden_channels=96, encoder_layers=0, decoder_ll=0, decoder_cl=0).to(device)
optimizerV2 = torch.optim.Adam(model_v2.parameters(), lr=0.005)
start_train(model_v2, optimizerV2, max_epochs = 1000, print_each = 100)

  0%|          | 0/999 [00:00<?, ?it/s]

Epoch: 100, Loss: 3.0342, Train: 1.1059, Val: 1.1634, Test: 1.1591
Epoch: 200, Loss: 2.3098, Train: 0.9891, Val: 1.1019, Test: 1.0912
Epoch: 300, Loss: 2.0523, Train: 0.9406, Val: 1.0930, Test: 1.0796
Epoch: 400, Loss: 1.8718, Train: 0.8983, Val: 1.0595, Test: 1.0470
Epoch: 500, Loss: 1.7456, Train: 0.9007, Val: 1.0814, Test: 1.0700
Epoch: 600, Loss: 1.6082, Train: 0.8687, Val: 1.0770, Test: 1.0638
Epoch: 700, Loss: 1.4837, Train: 0.8425, Val: 1.0765, Test: 1.0682
Epoch: 800, Loss: 1.5021, Train: 0.9385, Val: 1.1580, Test: 1.1443
Epoch: 900, Loss: 1.3001, Train: 0.8083, Val: 1.0882, Test: 1.0733
Epoch: 999, Loss: 1.2390, Train: 0.7851, Val: 1.0990, Test: 1.0774


### Далее я еще немного попробовал большее число эпох, но основной score уже побит

In [267]:
# input 80 encoder layers 1 decoder layers 3
model_v2 = ModelV2(hidden_channels=80, encoder_layers=1, decoder_ll=0, decoder_cl=0).to(device)
optimizerV2 = torch.optim.Adam(model_v2.parameters(), lr=0.001)
start_train(model_v2, optimizerV2, max_epochs = 4000, print_each = 100)

  0%|          | 0/3999 [00:00<?, ?it/s]

Epoch: 100, Loss: 5.2165, Train: 1.2565, Val: 1.2872, Test: 1.2819
Epoch: 200, Loss: 2.9737, Train: 1.0959, Val: 1.1553, Test: 1.1493
Epoch: 300, Loss: 2.4490, Train: 1.0397, Val: 1.1444, Test: 1.1282
Epoch: 400, Loss: 2.2429, Train: 0.9686, Val: 1.0985, Test: 1.0844
Epoch: 500, Loss: 2.0847, Train: 0.9877, Val: 1.1295, Test: 1.1181
Epoch: 600, Loss: 2.0025, Train: 0.9405, Val: 1.1012, Test: 1.0900
Epoch: 700, Loss: 1.8244, Train: 0.8856, Val: 1.0615, Test: 1.0543
Epoch: 800, Loss: 1.8191, Train: 0.9253, Val: 1.1158, Test: 1.1022
Epoch: 900, Loss: 1.7026, Train: 0.8794, Val: 1.0812, Test: 1.0651
Epoch: 1000, Loss: 1.6591, Train: 0.8868, Val: 1.0838, Test: 1.0641
Epoch: 1100, Loss: 1.6056, Train: 0.8863, Val: 1.1033, Test: 1.0763
Epoch: 1200, Loss: 1.5557, Train: 0.8462, Val: 1.0659, Test: 1.0438
Epoch: 1300, Loss: 1.5322, Train: 0.8518, Val: 1.0821, Test: 1.0583
Epoch: 1400, Loss: 1.5066, Train: 0.8420, Val: 1.0745, Test: 1.0515
Epoch: 1500, Loss: 1.5244, Train: 0.8568, Val: 1.0904, Te

In [273]:
# input 128 encoder layers 0 decoder layers 3
model_v2 = ModelV2(hidden_channels=128, encoder_layers=2, decoder_ll=0, decoder_cl=0).to(device)
optimizerV2 = torch.optim.Adam(model_v2.parameters(), lr=0.001)
start_train(model_v2, optimizerV2, max_epochs = 6000, print_each = 100)

  0%|          | 0/5999 [00:00<?, ?it/s]

Epoch: 100, Loss: 5.2856, Train: 1.2512, Val: 1.2874, Test: 1.2817
Epoch: 200, Loss: 2.7913, Train: 1.1063, Val: 1.1897, Test: 1.1809
Epoch: 300, Loss: 2.2810, Train: 0.9841, Val: 1.1187, Test: 1.0972
Epoch: 400, Loss: 2.1297, Train: 1.0140, Val: 1.1727, Test: 1.1529
Epoch: 500, Loss: 2.0481, Train: 0.9475, Val: 1.1168, Test: 1.0983
Epoch: 600, Loss: 2.0402, Train: 0.9638, Val: 1.1342, Test: 1.1172
Epoch: 700, Loss: 1.9801, Train: 0.9332, Val: 1.1184, Test: 1.0972
Epoch: 800, Loss: 1.8620, Train: 0.9364, Val: 1.1285, Test: 1.1074
Epoch: 900, Loss: 1.8157, Train: 0.9198, Val: 1.1187, Test: 1.0950
Epoch: 1000, Loss: 1.8248, Train: 0.9170, Val: 1.1291, Test: 1.1011
Epoch: 1100, Loss: 1.8329, Train: 0.8997, Val: 1.1065, Test: 1.0740
Epoch: 1200, Loss: 1.6901, Train: 0.8921, Val: 1.1260, Test: 1.0854
Epoch: 1300, Loss: 1.7004, Train: 0.8947, Val: 1.1258, Test: 1.0923
Epoch: 1400, Loss: 1.6393, Train: 0.8807, Val: 1.1236, Test: 1.0920
Epoch: 1500, Loss: 1.6022, Train: 0.8874, Val: 1.1412, Te

In [372]:
# input 128 encoder layers 0 decoder layers 3
model_v2 = ModelV2(hidden_channels=156, encoder_layers=1, decoder_ll=0, decoder_cl=0).to(device)
optimizerV2 = torch.optim.Adam(model_v2.parameters(), lr=0.001)
start_train(model_v2, optimizerV2, max_epochs = 3000, print_each = 100)

  0%|          | 0/2999 [00:00<?, ?it/s]

Epoch: 100, Loss: 4.6173, Train: 1.1889, Val: 1.2254, Test: 1.2233
Epoch: 200, Loss: 2.6606, Train: 1.0513, Val: 1.1366, Test: 1.1231
Epoch: 300, Loss: 2.2676, Train: 0.9748, Val: 1.1092, Test: 1.0849
Epoch: 400, Loss: 2.1035, Train: 0.9746, Val: 1.1239, Test: 1.1039
Epoch: 500, Loss: 2.2131, Train: 0.9250, Val: 1.0824, Test: 1.0552
Epoch: 600, Loss: 1.8998, Train: 0.9305, Val: 1.1061, Test: 1.0853
Epoch: 700, Loss: 1.8720, Train: 0.9208, Val: 1.1114, Test: 1.0849
Epoch: 800, Loss: 1.7413, Train: 0.8852, Val: 1.0941, Test: 1.0668
Epoch: 900, Loss: 1.6860, Train: 0.9041, Val: 1.1226, Test: 1.0980
Epoch: 1000, Loss: 1.5804, Train: 0.8757, Val: 1.1070, Test: 1.0840
Epoch: 1100, Loss: 1.5403, Train: 0.8560, Val: 1.0951, Test: 1.0734
Epoch: 1200, Loss: 1.5593, Train: 0.8651, Val: 1.0956, Test: 1.0772
Epoch: 1300, Loss: 1.4749, Train: 0.8422, Val: 1.0979, Test: 1.0762
Epoch: 1400, Loss: 1.4325, Train: 0.8362, Val: 1.1013, Test: 1.0773
Epoch: 1500, Loss: 1.4031, Train: 0.8272, Val: 1.0958, Te

### По последнему заданию давайте считать, что задача найти архитектуру, для которой RMSE на тесте лучше 1.1
### Задача была выполнена, показал разные модели

## 2) Попробуйте вместо GraphSage модуль Graph Attention и также подберите оптимальные параметры  (2 балла)

In [454]:
import torch
from torch_geometric.nn import GATConv, global_add_pool
from torch.nn import Linear, Dropout

class GNNEncoderGAT(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels, heads=2, num_layers=1, dropout=0.5):
        super().__init__()
#         self.conv_in = GATConv((-1, -1), 16, heads=8, dropout=0.6, add_self_loops=False)
#         self.conv_out = GATConv(16 * 8, out_channels, heads=1, concat=False, dropout=0.6, add_self_loops=False)
        
        self.conv_in = GATConv((-1, -1), hidden_channels, heads=heads, dropout=dropout, add_self_loops=False)
        
        self.gat_layers = torch.nn.ModuleList([
            GATConv(hidden_channels * heads, hidden_channels, heads=heads, dropout=dropout, add_self_loops=False) for _ in range(num_layers)
        ])
        
        self.conv_out = GATConv(hidden_channels * heads, out_channels, heads=1, dropout=dropout, add_self_loops=False)

    def forward(self, x, edge_index):
        
        x = F.elu(self.conv_in(x, edge_index))
        
        for gat_layer in self.gat_layers:
            x = F.elu(gat_layer(x, edge_index))

        x = self.conv_out(x, edge_index)
        return x


class EdgeDecoderGAT(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.lin_in = Linear(2 * hidden_channels, hidden_channels)
        
        self.lin_layers = torch.nn.ModuleList([
            Linear(hidden_channels, hidden_channels // 2),
            Linear(hidden_channels // 2, hidden_channels // 4),
            Linear(hidden_channels // 4, hidden_channels // 8),
        ])
        
        self.lin_out = Linear(hidden_channels // 8, 1)

    def forward(self, z_dict, edge_label_index):
        row, col = edge_label_index
        z = torch.cat([z_dict['user'][row], z_dict['movie'][col]], dim=-1)

        z = self.lin_in(z).relu()
        
        for lin_layer in self.lin_layers:
            z = lin_layer(z).relu()
            
        z = self.lin_out(z)
        return z.view(-1)


class ModelGAT(torch.nn.Module):
    def __init__(self, hidden_channels, encoder_layers=1, heads=1):
        super().__init__()
        self.encoder = GNNEncoderGAT(hidden_channels, hidden_channels, num_layers=encoder_layers, heads=heads)
        self.encoder = to_hetero(self.encoder, data.metadata(), aggr='sum')
        self.decoder = EdgeDecoderGAT(hidden_channels)

    def forward(self, x_dict, edge_index_dict, edge_label_index):
        z_dict = self.encoder(x_dict, edge_index_dict)
        return self.decoder(z_dict, edge_label_index)

In [446]:
model_gat

ModelGAT(
  (encoder): GraphModule(
    (conv_in): ModuleDict(
      (user__rates__movie): GATConv((-1, -1), 16, heads=8)
      (movie__rev_rates__user): GATConv((-1, -1), 16, heads=8)
    )
    (conv_out): ModuleDict(
      (user__rates__movie): GATConv(128, 64, heads=1)
      (movie__rev_rates__user): GATConv(128, 64, heads=1)
    )
  )
  (decoder): EdgeDecoderGAT(
    (lin_in): Linear(in_features=128, out_features=64, bias=True)
    (lin_layers): ModuleList(
      (0): Linear(in_features=64, out_features=32, bias=True)
      (1): Linear(in_features=32, out_features=16, bias=True)
      (2): Linear(in_features=16, out_features=8, bias=True)
    )
    (lin_out): Linear(in_features=8, out_features=1, bias=True)
  )
)

In [472]:
model_gat = ModelGAT(hidden_channels=32, encoder_layers=4, heads=8).to(device)
optimizer_gat = torch.optim.Adam(model_gat.parameters(), lr=0.005)

start_train(model_gat, optimizer_gat, max_epochs=1000, print_each=100)

  0%|          | 0/999 [00:00<?, ?it/s]

Epoch: 100, Loss: 13.0386, Train: 2.8690, Val: 2.8733, Test: 2.8580
Epoch: 200, Loss: 10.1961, Train: 2.4840, Val: 2.4888, Test: 2.4736
Epoch: 300, Loss: 8.3717, Train: 2.1677, Val: 2.1731, Test: 2.1581
Epoch: 400, Loss: 7.2810, Train: 1.9195, Val: 1.9255, Test: 1.9108
Epoch: 500, Loss: 6.6799, Train: 1.7344, Val: 1.7409, Test: 1.7267
Epoch: 600, Loss: 6.3772, Train: 1.6035, Val: 1.6105, Test: 1.5966
Epoch: 700, Loss: 6.2390, Train: 1.5155, Val: 1.5229, Test: 1.5094
Epoch: 800, Loss: 6.1820, Train: 1.4591, Val: 1.4667, Test: 1.4535
Epoch: 900, Loss: 6.1609, Train: 1.4246, Val: 1.4323, Test: 1.4194
Epoch: 999, Loss: 6.1540, Train: 1.4046, Val: 1.4124, Test: 1.3996


In [458]:
model_gat = ModelGAT(hidden_channels=64, encoder_layers=0, heads=4).to(device)
optimizer_gat = torch.optim.Adam(model_gat.parameters(), lr=0.001)

start_train(model_gat, optimizer_gat, max_epochs=1000, print_each=100)

  0%|          | 0/999 [00:00<?, ?it/s]

Epoch: 100, Loss: 5.7456, Train: 1.3447, Val: 1.3637, Test: 1.3542
Epoch: 200, Loss: 4.5827, Train: 1.1847, Val: 1.1941, Test: 1.1746
Epoch: 300, Loss: 4.2765, Train: 1.1664, Val: 1.1807, Test: 1.1486
Epoch: 400, Loss: 3.7082, Train: 1.1088, Val: 1.1315, Test: 1.1004
Epoch: 500, Loss: 3.5132, Train: 1.1210, Val: 1.1488, Test: 1.1216
Epoch: 600, Loss: 3.4676, Train: 1.0914, Val: 1.1254, Test: 1.0990
Epoch: 700, Loss: 3.3377, Train: 1.1338, Val: 1.1695, Test: 1.1417
Epoch: 800, Loss: 3.2313, Train: 1.0558, Val: 1.0931, Test: 1.0704
Epoch: 900, Loss: 3.1017, Train: 1.0960, Val: 1.1290, Test: 1.1030
Epoch: 999, Loss: 3.0239, Train: 1.0677, Val: 1.1080, Test: 1.0832


In [463]:
model_gat = ModelGAT(hidden_channels=64, encoder_layers=0, heads=4).to(device)
optimizer_gat = torch.optim.Adam(model_gat.parameters(), lr=0.005)

start_train(model_gat, optimizer_gat, max_epochs=1000, print_each=100)

  0%|          | 0/999 [00:00<?, ?it/s]

Epoch: 100, Loss: 4.6188, Train: 1.1809, Val: 1.1932, Test: 1.1696
Epoch: 200, Loss: 3.7479, Train: 1.1332, Val: 1.1551, Test: 1.1239
Epoch: 300, Loss: 3.5496, Train: 1.1015, Val: 1.1305, Test: 1.1047
Epoch: 400, Loss: 3.5314, Train: 1.0580, Val: 1.0949, Test: 1.0757
Epoch: 500, Loss: 3.2825, Train: 1.0896, Val: 1.1280, Test: 1.1040
Epoch: 600, Loss: 3.2435, Train: 1.0639, Val: 1.1074, Test: 1.0831
Epoch: 700, Loss: 3.1874, Train: 1.0746, Val: 1.1120, Test: 1.0897
Epoch: 800, Loss: 3.1470, Train: 1.0530, Val: 1.0947, Test: 1.0715
Epoch: 900, Loss: 2.9948, Train: 1.0280, Val: 1.0727, Test: 1.0560
Epoch: 999, Loss: 2.9923, Train: 1.0214, Val: 1.0734, Test: 1.0552


In [464]:
model_gat = ModelGAT(hidden_channels=64, encoder_layers=0, heads=4).to(device)
optimizer_gat = torch.optim.Adam(model_gat.parameters(), lr=0.01)

start_train(model_gat, optimizer_gat, max_epochs=1000, print_each=100)

  0%|          | 0/999 [00:00<?, ?it/s]

Epoch: 100, Loss: 4.0158, Train: 1.1042, Val: 1.1127, Test: 1.0937
Epoch: 200, Loss: 3.4911, Train: 1.0982, Val: 1.1213, Test: 1.0963
Epoch: 300, Loss: 3.1613, Train: 1.0865, Val: 1.1203, Test: 1.0979
Epoch: 400, Loss: 3.0196, Train: 1.0779, Val: 1.1189, Test: 1.0999
Epoch: 500, Loss: 2.9405, Train: 1.1282, Val: 1.1683, Test: 1.1566
Epoch: 600, Loss: 2.8763, Train: 1.0859, Val: 1.1397, Test: 1.1270
Epoch: 700, Loss: 2.7768, Train: 1.0863, Val: 1.1377, Test: 1.1314
Epoch: 800, Loss: 2.7122, Train: 1.0728, Val: 1.1313, Test: 1.1298
Epoch: 900, Loss: 2.7019, Train: 1.0642, Val: 1.1273, Test: 1.1277
Epoch: 999, Loss: 2.6808, Train: 1.0570, Val: 1.1252, Test: 1.1192


In [457]:
model_gat = ModelGAT(hidden_channels=64, encoder_layers=0, heads=8).to(device)
optimizer_gat = torch.optim.Adam(model_gat.parameters(), lr=0.001)

start_train(model_gat, optimizer_gat, max_epochs=1000, print_each=100)

  0%|          | 0/999 [00:00<?, ?it/s]

Epoch: 100, Loss: 5.3128, Train: 1.2289, Val: 1.2453, Test: 1.2321
Epoch: 200, Loss: 4.4575, Train: 1.1690, Val: 1.1807, Test: 1.1613
Epoch: 300, Loss: 4.1129, Train: 1.1360, Val: 1.1603, Test: 1.1279
Epoch: 400, Loss: 3.6627, Train: 1.1218, Val: 1.1474, Test: 1.1113
Epoch: 500, Loss: 3.4442, Train: 1.1607, Val: 1.1946, Test: 1.1556
Epoch: 600, Loss: 3.2731, Train: 1.0904, Val: 1.1302, Test: 1.0954
Epoch: 700, Loss: 3.1963, Train: 1.1024, Val: 1.1361, Test: 1.0992
Epoch: 800, Loss: 3.1096, Train: 1.1247, Val: 1.1595, Test: 1.1210
Epoch: 900, Loss: 3.0998, Train: 1.1170, Val: 1.1531, Test: 1.1170
Epoch: 999, Loss: 2.9952, Train: 1.1049, Val: 1.1459, Test: 1.1109


In [468]:
model_gat = ModelGAT(hidden_channels=64, encoder_layers=1, heads=2).to(device)
optimizer_gat = torch.optim.Adam(model_gat.parameters(), lr=0.005)

start_train(model_gat, optimizer_gat, max_epochs=1000, print_each=100)

  0%|          | 0/999 [00:00<?, ?it/s]

Epoch: 100, Loss: 5.1738, Train: 1.0620, Val: 1.0964, Test: 1.0849
Epoch: 200, Loss: 4.5964, Train: 1.0622, Val: 1.1061, Test: 1.0906
Epoch: 300, Loss: 4.4405, Train: 1.0371, Val: 1.0877, Test: 1.0657
Epoch: 400, Loss: 3.9395, Train: 1.4496, Val: 1.4728, Test: 1.4703
Epoch: 500, Loss: 3.8427, Train: 1.5670, Val: 1.5816, Test: 1.5883
Epoch: 600, Loss: 3.6192, Train: 1.2957, Val: 1.3194, Test: 1.3199
Epoch: 700, Loss: 3.5235, Train: 1.2418, Val: 1.2628, Test: 1.2617
Epoch: 800, Loss: 3.4134, Train: 1.5704, Val: 1.5805, Test: 1.5860
Epoch: 900, Loss: 3.3780, Train: 1.7339, Val: 1.7393, Test: 1.7533
Epoch: 999, Loss: 3.4880, Train: 1.5358, Val: 1.5429, Test: 1.5506


In [459]:
model_gat = ModelGAT(hidden_channels=80, encoder_layers=0, heads=4).to(device)
optimizer_gat = torch.optim.Adam(model_gat.parameters(), lr=0.001)

start_train(model_gat, optimizer_gat, max_epochs=1000, print_each=100)

  0%|          | 0/999 [00:00<?, ?it/s]

Epoch: 100, Loss: 5.1797, Train: 1.2058, Val: 1.2197, Test: 1.2065
Epoch: 200, Loss: 4.5211, Train: 1.1683, Val: 1.1785, Test: 1.1560
Epoch: 300, Loss: 4.1476, Train: 1.1343, Val: 1.1593, Test: 1.1269
Epoch: 400, Loss: 3.6012, Train: 1.0584, Val: 1.0858, Test: 1.0609
Epoch: 500, Loss: 3.5085, Train: 1.0638, Val: 1.0947, Test: 1.0717
Epoch: 600, Loss: 3.4349, Train: 1.0979, Val: 1.1348, Test: 1.1088
Epoch: 700, Loss: 3.3463, Train: 1.0789, Val: 1.1141, Test: 1.0862
Epoch: 800, Loss: 3.1835, Train: 1.0861, Val: 1.1254, Test: 1.0971
Epoch: 900, Loss: 3.0586, Train: 1.0723, Val: 1.1168, Test: 1.0904
Epoch: 999, Loss: 2.9751, Train: 1.0532, Val: 1.0997, Test: 1.0789


In [466]:
model_gat = ModelGAT(hidden_channels=80, encoder_layers=0, heads=4).to(device)
optimizer_gat = torch.optim.Adam(model_gat.parameters(), lr=0.005)

start_train(model_gat, optimizer_gat, max_epochs=1000, print_each=100)

  0%|          | 0/999 [00:00<?, ?it/s]

Epoch: 100, Loss: 4.2414, Train: 1.1875, Val: 1.2021, Test: 1.1793
Epoch: 200, Loss: 3.6659, Train: 1.1332, Val: 1.1674, Test: 1.1428
Epoch: 300, Loss: 3.5407, Train: 1.0976, Val: 1.1274, Test: 1.1072
Epoch: 400, Loss: 3.2267, Train: 1.0748, Val: 1.1115, Test: 1.0904
Epoch: 500, Loss: 3.3046, Train: 1.1486, Val: 1.1831, Test: 1.1667
Epoch: 600, Loss: 2.9951, Train: 1.0990, Val: 1.1380, Test: 1.1251
Epoch: 700, Loss: 2.9112, Train: 1.0462, Val: 1.0950, Test: 1.0813
Epoch: 800, Loss: 2.7637, Train: 1.0414, Val: 1.0971, Test: 1.0872
Epoch: 900, Loss: 2.7222, Train: 1.0849, Val: 1.1428, Test: 1.1400
Epoch: 999, Loss: 2.6722, Train: 1.0077, Val: 1.0807, Test: 1.0641


In [462]:
model_gat = ModelGAT(hidden_channels=128, encoder_layers=2, heads=5).to(device)
optimizer_gat = torch.optim.Adam(model_gat.parameters(), lr=0.001)

start_train(model_gat, optimizer_gat, max_epochs=1000, print_each=100)

  0%|          | 0/999 [00:00<?, ?it/s]

Epoch: 100, Loss: 5.2538, Train: 1.0965, Val: 1.1131, Test: 1.0990
Epoch: 200, Loss: 4.9420, Train: 1.1756, Val: 1.1894, Test: 1.1654
Epoch: 300, Loss: 4.7979, Train: 1.1778, Val: 1.1999, Test: 1.1739
Epoch: 400, Loss: 4.6481, Train: 1.1146, Val: 1.1405, Test: 1.1120
Epoch: 500, Loss: 4.2150, Train: 1.0698, Val: 1.0939, Test: 1.0712
Epoch: 600, Loss: 3.9422, Train: 1.0998, Val: 1.1254, Test: 1.1052
Epoch: 700, Loss: 3.9681, Train: 1.1418, Val: 1.1636, Test: 1.1447
Epoch: 800, Loss: 3.9500, Train: 1.1392, Val: 1.1722, Test: 1.1439
Epoch: 900, Loss: 3.7133, Train: 1.1358, Val: 1.1642, Test: 1.1406
Epoch: 999, Loss: 3.7397, Train: 1.2707, Val: 1.2884, Test: 1.2801


In [470]:
model_gat = ModelGAT(hidden_channels=128, encoder_layers=1, heads=4).to(device)
optimizer_gat = torch.optim.Adam(model_gat.parameters(), lr=0.005)

start_train(model_gat, optimizer_gat, max_epochs=1000, print_each=100)

  0%|          | 0/999 [00:00<?, ?it/s]

Epoch: 100, Loss: 5.2330, Train: 1.1799, Val: 1.2096, Test: 1.1972
Epoch: 200, Loss: 4.6729, Train: 1.0219, Val: 1.0702, Test: 1.0438
Epoch: 300, Loss: 4.2860, Train: 1.1576, Val: 1.1945, Test: 1.1463
Epoch: 400, Loss: 3.8653, Train: 1.1413, Val: 1.1783, Test: 1.1550
Epoch: 500, Loss: 3.7040, Train: 1.1263, Val: 1.1652, Test: 1.1364
Epoch: 600, Loss: 3.4120, Train: 1.0808, Val: 1.1179, Test: 1.0999
Epoch: 700, Loss: 3.3644, Train: 1.0965, Val: 1.1358, Test: 1.1095
Epoch: 800, Loss: 3.2905, Train: 1.0799, Val: 1.1203, Test: 1.1007
Epoch: 900, Loss: 3.2085, Train: 1.0728, Val: 1.1159, Test: 1.0971
Epoch: 999, Loss: 3.1055, Train: 1.0298, Val: 1.0751, Test: 1.0580


In [473]:
model_gat = ModelGAT(hidden_channels=128, encoder_layers=1, heads=5).to(device)
optimizer_gat = torch.optim.Adam(model_gat.parameters(), lr=0.005)

start_train(model_gat, optimizer_gat, max_epochs=1000, print_each=100)

  0%|          | 0/999 [00:00<?, ?it/s]

Epoch: 100, Loss: 5.2032, Train: 1.1524, Val: 1.1855, Test: 1.1651
Epoch: 200, Loss: 4.2145, Train: 1.0431, Val: 1.0778, Test: 1.0537
Epoch: 300, Loss: 4.9850, Train: 1.1767, Val: 1.2009, Test: 1.1812
Epoch: 400, Loss: 3.6671, Train: 1.1427, Val: 1.1783, Test: 1.1564
Epoch: 500, Loss: 3.5351, Train: 1.1141, Val: 1.1477, Test: 1.1269
Epoch: 600, Loss: 3.4521, Train: 1.0851, Val: 1.1207, Test: 1.1051
Epoch: 700, Loss: 3.4621, Train: 1.1068, Val: 1.1427, Test: 1.1259
Epoch: 800, Loss: 3.3379, Train: 1.1021, Val: 1.1333, Test: 1.1193
Epoch: 900, Loss: 3.2689, Train: 1.0679, Val: 1.1107, Test: 1.0939
Epoch: 999, Loss: 3.0926, Train: 1.0652, Val: 1.1079, Test: 1.0913


### Эта модель показала в один момент даже score 1.02, при этом не столкнулась с переобучением

In [474]:
model_gat = ModelGAT(hidden_channels=128, encoder_layers=1, heads=3).to(device)
optimizer_gat = torch.optim.Adam(model_gat.parameters(), lr=0.005)

start_train(model_gat, optimizer_gat, max_epochs=1000, print_each=100)

  0%|          | 0/999 [00:00<?, ?it/s]

Epoch: 100, Loss: 4.8322, Train: 1.0243, Val: 1.0683, Test: 1.0491
Epoch: 200, Loss: 4.4309, Train: 1.0236, Val: 1.0671, Test: 1.0441
Epoch: 300, Loss: 4.3275, Train: 1.0031, Val: 1.0479, Test: 1.0268
Epoch: 400, Loss: 4.1384, Train: 1.0978, Val: 1.1441, Test: 1.1160
Epoch: 500, Loss: 3.9613, Train: 1.2139, Val: 1.2576, Test: 1.2209
Epoch: 600, Loss: 3.6545, Train: 1.1620, Val: 1.2053, Test: 1.1814
Epoch: 700, Loss: 3.5236, Train: 1.1689, Val: 1.2067, Test: 1.1902
Epoch: 800, Loss: 3.4349, Train: 1.0692, Val: 1.1185, Test: 1.0978
Epoch: 900, Loss: 3.3673, Train: 1.1506, Val: 1.1912, Test: 1.1751
Epoch: 999, Loss: 3.4448, Train: 1.0461, Val: 1.0931, Test: 1.0735


In [475]:
model_gat = ModelGAT(hidden_channels=128, encoder_layers=2, heads=3).to(device)
optimizer_gat = torch.optim.Adam(model_gat.parameters(), lr=0.005)

start_train(model_gat, optimizer_gat, max_epochs=1000, print_each=100)

  0%|          | 0/999 [00:00<?, ?it/s]

Epoch: 100, Loss: 5.9921, Train: 1.6832, Val: 1.7039, Test: 1.6840
Epoch: 200, Loss: 4.9108, Train: 1.1342, Val: 1.1530, Test: 1.1295
Epoch: 300, Loss: 4.6210, Train: 1.0639, Val: 1.0857, Test: 1.0700
Epoch: 400, Loss: 4.2120, Train: 1.0370, Val: 1.0757, Test: 1.0646
Epoch: 500, Loss: 4.0769, Train: 1.0960, Val: 1.1197, Test: 1.1041
Epoch: 600, Loss: 3.8828, Train: 1.0905, Val: 1.1225, Test: 1.1139
Epoch: 700, Loss: 3.6750, Train: 1.1779, Val: 1.1985, Test: 1.1760
Epoch: 800, Loss: 3.7792, Train: 1.1612, Val: 1.1899, Test: 1.1729
Epoch: 900, Loss: 3.4844, Train: 1.2323, Val: 1.2566, Test: 1.2468
Epoch: 999, Loss: 3.4151, Train: 1.1113, Val: 1.1483, Test: 1.1359


In [476]:
model_gat = ModelGAT(hidden_channels=128, encoder_layers=2, heads=6).to(device)
optimizer_gat = torch.optim.Adam(model_gat.parameters(), lr=0.005)

start_train(model_gat, optimizer_gat, max_epochs=1000, print_each=100)

  0%|          | 0/999 [00:00<?, ?it/s]

Epoch: 100, Loss: 4.9518, Train: 1.1687, Val: 1.1937, Test: 1.1661
Epoch: 200, Loss: 9.9366, Train: 1.1914, Val: 1.2035, Test: 1.1755
Epoch: 300, Loss: 4.5712, Train: 1.1288, Val: 1.1347, Test: 1.1111
Epoch: 400, Loss: 4.1736, Train: 1.1713, Val: 1.1895, Test: 1.1731
Epoch: 500, Loss: 4.2004, Train: 1.7632, Val: 1.7836, Test: 1.7971
Epoch: 600, Loss: 3.9216, Train: 1.1625, Val: 1.1857, Test: 1.1640
Epoch: 700, Loss: 3.7148, Train: 1.1684, Val: 1.1892, Test: 1.1668
Epoch: 800, Loss: 3.5247, Train: 1.1866, Val: 1.2053, Test: 1.1796
Epoch: 900, Loss: 3.3742, Train: 1.1539, Val: 1.1692, Test: 1.1375
Epoch: 999, Loss: 3.3372, Train: 1.1851, Val: 1.2044, Test: 1.1756


In [477]:
model_gat = ModelGAT(hidden_channels=8 * 20, encoder_layers=1, heads=3).to(device)
optimizer_gat = torch.optim.Adam(model_gat.parameters(), lr=0.005)

start_train(model_gat, optimizer_gat, max_epochs=1000, print_each=100)

  0%|          | 0/999 [00:00<?, ?it/s]

Epoch: 100, Loss: 4.9088, Train: 1.1036, Val: 1.1391, Test: 1.1224
Epoch: 200, Loss: 4.5454, Train: 1.0614, Val: 1.1051, Test: 1.0811
Epoch: 300, Loss: 4.2345, Train: 1.0278, Val: 1.0730, Test: 1.0520
Epoch: 400, Loss: 3.7955, Train: 1.0934, Val: 1.1327, Test: 1.1111
Epoch: 500, Loss: 3.5925, Train: 1.1173, Val: 1.1589, Test: 1.1469
Epoch: 600, Loss: 3.5038, Train: 1.0987, Val: 1.1349, Test: 1.1224
Epoch: 700, Loss: 3.4772, Train: 1.1234, Val: 1.1564, Test: 1.1455
Epoch: 800, Loss: 3.3589, Train: 1.1573, Val: 1.1902, Test: 1.1804
Epoch: 900, Loss: 3.2840, Train: 1.0729, Val: 1.1086, Test: 1.0982
Epoch: 999, Loss: 3.2843, Train: 1.1630, Val: 1.1900, Test: 1.1943


In [478]:
model_gat = ModelGAT(hidden_channels=128, encoder_layers=1, heads=3).to(device)
optimizer_gat = torch.optim.Adam(model_gat.parameters(), lr=0.005)

start_train(model_gat, optimizer_gat, max_epochs=5000, print_each=100)

  0%|          | 0/4999 [00:00<?, ?it/s]

Epoch: 100, Loss: 4.9427, Train: 1.2372, Val: 1.2702, Test: 1.2494
Epoch: 200, Loss: 4.4675, Train: 1.2055, Val: 1.2343, Test: 1.2059
Epoch: 300, Loss: 3.6148, Train: 1.1348, Val: 1.1693, Test: 1.1547
Epoch: 400, Loss: 3.5126, Train: 1.1012, Val: 1.1375, Test: 1.1199
Epoch: 500, Loss: 3.3875, Train: 1.1335, Val: 1.1681, Test: 1.1488
Epoch: 600, Loss: 3.3160, Train: 1.0979, Val: 1.1431, Test: 1.1194
Epoch: 700, Loss: 3.3056, Train: 1.0973, Val: 1.1391, Test: 1.1193
Epoch: 800, Loss: 3.1608, Train: 1.1126, Val: 1.1568, Test: 1.1340
Epoch: 900, Loss: 3.1805, Train: 1.0718, Val: 1.1165, Test: 1.1043
Epoch: 1000, Loss: 3.1041, Train: 1.0858, Val: 1.1317, Test: 1.1203
Epoch: 1100, Loss: 3.0961, Train: 1.0275, Val: 1.0761, Test: 1.0624
Epoch: 1200, Loss: 2.9863, Train: 1.0497, Val: 1.1018, Test: 1.0852
Epoch: 1300, Loss: 2.9583, Train: 1.0692, Val: 1.1266, Test: 1.1101
Epoch: 1400, Loss: 2.9421, Train: 1.0573, Val: 1.1082, Test: 1.0934
Epoch: 1500, Loss: 2.8465, Train: 1.0506, Val: 1.0990, Te

## Получил стабильно обучающуюся модель на GATConv, rmse 1.0195, и она продолжает спокойно обучаться