Автор: Лапутин Фёдор

In [7]:
import optuna
import torch
import torch.nn.functional as F
from torch.nn import Linear
from torch_geometric.nn import SAGEConv, to_hetero
import torch_geometric.transforms as T
from torch_geometric.datasets import MovieLens
from torch_geometric.data import HeteroData

## Задание

1) Подберите оптимальные параметры для сети из примера выше (2 балла)

In [8]:
dataset_path = "/tmp/"
dataset = MovieLens(root=dataset_path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

data = dataset[0].to(device)

# Add user node features for message passing
data["user"].x = torch.eye(data["user"].num_nodes, device=device)
del data["user"].num_nodes

# Add a reverse ('movie', 'rev_rates', 'user') relation for message passing
data = T.ToUndirected()(data)
del data["movie", "rev_rates", "user"].edge_label  # Remove "reverse" label

# Perform a link-level split into training, validation, and test edges
train_data, val_data, test_data = T.RandomLinkSplit(
    num_val=0.1,
    num_test=0.1,
    neg_sampling_ratio=0.0,
    edge_types=[("user", "rates", "movie")],
    rev_edge_types=[("movie", "rev_rates", "user")],
)(data)

weight = torch.bincount(train_data["user", "movie"].edge_label)
weight = weight.max() / weight

def weighted_mse_loss(pred, target, weight=None):
    weight = 1.0 if weight is None else weight[target].to(pred.dtype)
    return (weight * (pred - target.to(pred.dtype)).pow(2)).mean()


class GNNEncoder(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = SAGEConv((-1, -1), hidden_channels)
        self.conv2 = SAGEConv((-1, -1), out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index)
        return x


class EdgeDecoder(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.lin1 = Linear(2 * hidden_channels, hidden_channels)
        self.lin2 = Linear(hidden_channels, 1)

    def forward(self, z_dict, edge_label_index):
        row, col = edge_label_index
        z = torch.cat([z_dict["user"][row], z_dict["movie"][col]], dim=-1)

        z = self.lin1(z).relu()
        z = self.lin2(z)
        return z.view(-1)


class Model(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.encoder = GNNEncoder(hidden_channels, hidden_channels)
        self.encoder = to_hetero(self.encoder, data.metadata(), aggr="sum")
        self.decoder = EdgeDecoder(hidden_channels)

    def forward(self, x_dict, edge_index_dict, edge_label_index):
        z_dict = self.encoder(x_dict, edge_index_dict)
        return self.decoder(z_dict, edge_label_index)

 
def objective(trial):
    hidden_channels = trial.suggest_int('hidden_channels', 16, 64)
    lr = trial.suggest_loguniform('lr', 1e-4, 1e-2)
    
    model = Model(hidden_channels=hidden_channels).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    def train():
        model.train()
        optimizer.zero_grad()
        pred = model(
            train_data.x_dict,
            train_data.edge_index_dict,
            train_data["user", "movie"].edge_label_index,
        )
        target = train_data["user", "movie"].edge_label
        loss = weighted_mse_loss(pred, target, weight)
        loss.backward()
        optimizer.step()
        return float(loss)

    @torch.no_grad()
    def test(data):
        model.eval()
        pred = model(
            data.x_dict, data.edge_index_dict, data["user", "movie"].edge_label_index
        )
        pred = pred.clamp(min=0, max=5)
        target = data["user", "movie"].edge_label.float()
        rmse = F.mse_loss(pred, target).sqrt()
        return float(rmse)

    for epoch in range(1, 101):
        loss = train()
        val_rmse = test(val_data)
        trial.report(val_rmse, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    return val_rmse


study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50, timeout=600)

print(f"Best trial: {study.best_trial.value}")
print(f"Best params: {study.best_trial.params}")


[I 2024-06-08 18:25:07,082] A new study created in memory with name: no-name-c42aa0be-a08c-4a69-a22a-d2143c2661fc
  lr = trial.suggest_loguniform('lr', 1e-4, 1e-2)
[I 2024-06-08 18:25:07,798] Trial 0 finished with value: 1.4604647159576416 and parameters: {'hidden_channels': 33, 'lr': 0.0002586726568968002}. Best is trial 0 with value: 1.4604647159576416.
[I 2024-06-08 18:25:08,296] Trial 1 finished with value: 1.3458919525146484 and parameters: {'hidden_channels': 55, 'lr': 0.0002976558635336231}. Best is trial 1 with value: 1.3458919525146484.
[I 2024-06-08 18:25:08,786] Trial 2 finished with value: 1.3755671977996826 and parameters: {'hidden_channels': 47, 'lr': 0.00032178154801175917}. Best is trial 1 with value: 1.3458919525146484.
[I 2024-06-08 18:25:09,272] Trial 3 finished with value: 1.1463196277618408 and parameters: {'hidden_channels': 17, 'lr': 0.009159024850660927}. Best is trial 3 with value: 1.1463196277618408.
[I 2024-06-08 18:25:09,747] Trial 4 finished with value: 1.2

Best trial: 1.1326123476028442
Best params: {'hidden_channels': 45, 'lr': 0.007365712892874416}


## Задание

2) Попробуйте вместо GraphSage модуль Graph Attention и также подберите оптимальные параметры  (2 балла)

In [11]:
import optuna
import torch
import torch.nn.functional as F
from torch.nn import Linear
from torch_geometric.nn import GATConv, to_hetero
import torch_geometric.transforms as T
from torch_geometric.datasets import MovieLens

dataset_path = "/tmp/"и
dataset = MovieLens(root=dataset_path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

data = dataset[0].to(device)

# Add user node features for message passing
data["user"].x = torch.eye(data["user"].num_nodes, device=device)
del data["user"].num_nodes

# Add a reverse ('movie', 'rev_rates', 'user') relation for message passing
data = T.ToUndirected()(data)
del data["movie", "rev_rates", "user"].edge_label  # Remove "reverse" label

# Perform a link-level split into training, validation, and test edges
train_data, val_data, test_data = T.RandomLinkSplit(
    num_val=0.1,
    num_test=0.1,
    neg_sampling_ratio=0.0,
    edge_types=[("user", "rates", "movie")],
    rev_edge_types=[("movie", "rev_rates", "user")],
)(data)


weight = torch.bincount(train_data["user", "movie"].edge_label)
weight = weight.max() / weight


def weighted_mse_loss(pred, target, weight=None):
    weight = 1.0 if weight is None else weight[target].to(pred.dtype)
    return (weight * (pred - target.to(pred.dtype)).pow(2)).mean()


class GNNEncoder(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels, heads):
        super().__init__()
        self.conv1 = GATConv((-1, -1), hidden_channels, heads=heads, concat=True, add_self_loops=False)
        self.conv2 = GATConv((-1, -1), out_channels, heads=1, concat=False, add_self_loops=False)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index)
        return x


class EdgeDecoder(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.lin1 = Linear(2 * hidden_channels, hidden_channels)
        self.lin2 = Linear(hidden_channels, 1)

    def forward(self, z_dict, edge_label_index):
        row, col = edge_label_index
        z = torch.cat([z_dict["user"][row], z_dict["movie"][col]], dim=-1)

        z = self.lin1(z).relu()
        z = self.lin2(z)
        return z.view(-1)


class Model(torch.nn.Module):
    def __init__(self, hidden_channels, heads):
        super().__init__()
        self.encoder = GNNEncoder(hidden_channels, hidden_channels, heads=heads)
        self.encoder = to_hetero(self.encoder, data.metadata(), aggr="sum")
        self.decoder = EdgeDecoder(hidden_channels)

    def forward(self, x_dict, edge_index_dict, edge_label_index):
        z_dict = self.encoder(x_dict, edge_index_dict)
        return self.decoder(z_dict, edge_label_index)

def objective(trial):
    hidden_channels = trial.suggest_int('hidden_channels', 16, 64)
    lr = trial.suggest_loguniform('lr', 1e-4, 1e-2)
    heads = trial.suggest_int('heads', 1, 8)
    
    model = Model(hidden_channels=hidden_channels, heads=heads).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    def train():
        model.train()
        optimizer.zero_grad()
        pred = model(
            train_data.x_dict,
            train_data.edge_index_dict,
            train_data["user", "movie"].edge_label_index,
        )
        target = train_data["user", "movie"].edge_label
        loss = weighted_mse_loss(pred, target, weight)
        loss.backward()
        optimizer.step()
        return float(loss)

    @torch.no_grad()
    def test(data):
        model.eval()
        pred = model(
            data.x_dict, data.edge_index_dict, data["user", "movie"].edge_label_index
        )
        pred = pred.clamp(min=0, max=5)
        target = data["user", "movie"].edge_label.float()
        rmse = F.mse_loss(pred, target).sqrt()
        return float(rmse)

    for epoch in range(1, 101):
        loss = train()
        val_rmse = test(val_data)
        trial.report(val_rmse, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    return val_rmse

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50, timeout=600)

print(f"Best trial: {study.best_trial.value}")
print(f"Best params: {study.best_trial.params}")


[I 2024-06-08 18:33:54,171] A new study created in memory with name: no-name-4922da34-4207-49af-9f01-57cf320438ce
  lr = trial.suggest_loguniform('lr', 1e-4, 1e-2)
[I 2024-06-08 18:33:55,225] Trial 0 finished with value: 1.1064506769180298 and parameters: {'hidden_channels': 58, 'lr': 0.00498431866073796, 'heads': 2}. Best is trial 0 with value: 1.1064506769180298.
[I 2024-06-08 18:33:55,970] Trial 1 finished with value: 1.3360326290130615 and parameters: {'hidden_channels': 47, 'lr': 0.0015566838773157512, 'heads': 2}. Best is trial 0 with value: 1.1064506769180298.
[I 2024-06-08 18:33:56,701] Trial 2 finished with value: 1.4363337755203247 and parameters: {'hidden_channels': 17, 'lr': 0.0011710143886116708, 'heads': 6}. Best is trial 0 with value: 1.1064506769180298.
[I 2024-06-08 18:33:57,465] Trial 3 finished with value: 1.2740157842636108 and parameters: {'hidden_channels': 58, 'lr': 0.0020732149435204934, 'heads': 2}. Best is trial 0 with value: 1.1064506769180298.
[I 2024-06-08 

Best trial: 1.1064506769180298
Best params: {'hidden_channels': 58, 'lr': 0.00498431866073796, 'heads': 2}


Выводы:
- После перебора параметров удалось добиться качества в 1.13 mse loss.
- Использовав Graph Attention, получили еще прирост и итоговое качество составило 1.10 mse loss.