# Работа с готовой моделью h5

In [None]:
!pip uninstall -y torch-scatter torch-sparse torch-geometric
!pip install torch-scatter -f https://data.pyg.org/whl/torch-{torch.__version__}.html
!pip install torch-sparse -f https://data.pyg.org/whl/torch-{torch.__version__}.html
!pip install torch-geometric

[0mLooking in links: https://data.pyg.org/whl/torch-2.6.0+cu124.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-2.6.0%2Bcu124/torch_scatter-2.1.2%2Bpt26cu124-cp311-cp311-linux_x86_64.whl (10.8 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/10.8 MB[0m [31m?[0m eta [36m-:--:--[0m
[?25h[31mERROR: Operation cancelled by user[0m[31m
[0mLooking in links: https://data.pyg.org/whl/torch-2.6.0+cu124.html
Collecting torch-sparse
[31mERROR: Operation cancelled by user[0m[31m
[0mCollecting torch-geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m46.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch-geometric
Successfully

In [None]:
import torch
import h5py
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors

In [None]:
from torch_geometric.nn import GCNConv
import torch.nn.functional as F

In [None]:
class TypeSafeGCN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim

        # Используем правильные названия параметров для GCNConv
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)

    def forward(self, x, edge_index=None):
        x = x.float()
        x = F.relu(self.conv1(x, edge_index))
        return self.conv2(x, edge_index)

In [None]:
def load_full_model(filename, device='cpu'):
    """Загрузка модели из HDF5 файла"""
    with h5py.File(filename, 'r') as f:
        model_grp = f['model']
        input_dim = model_grp.attrs['input_dim']
        hidden_dim = model_grp.attrs['hidden_dim']
        output_dim = model_grp.attrs['output_dim']

        model = TypeSafeGCN(input_dim, hidden_dim, output_dim).to(device)

        weights_grp = model_grp['weights']

        # Правильные названия параметров для GCNConv
        state_dict = {
            'conv1.lin.weight': torch.tensor(weights_grp['conv1.lin.weight'][...]),
            'conv1.bias': torch.tensor(weights_grp['conv1.bias'][...]),
            'conv2.lin.weight': torch.tensor(weights_grp['conv2.lin.weight'][...]),
            'conv2.bias': torch.tensor(weights_grp['conv2.bias'][...])
        }

        model.load_state_dict(state_dict)

        # Загрузка StandardScaler
        scaler = StandardScaler()
        scaler.mean_ = f['scaler/mean_'][...]
        scaler.scale_ = f['scaler/scale_'][...]
        scaler.var_ = f['scaler/var_'][...]

        # Загрузка feature columns
        feature_columns = [col.decode('utf-8') for col in f['feature_columns'][...]]

    return model, scaler, feature_columns

In [None]:
def get_recommendations(model, scaler, feature_columns, data, song_list, n=5):
    """Генерация рекомендаций"""
    # Проверка входных данных
    if not song_list:
        return pd.DataFrame(columns=['name', 'artists'])

    # Поиск входных песен
    input_features = []
    found_songs = []

    for song in song_list:
        try:
            # Поиск с учетом формата списка исполнителей
            mask = (
                data['name'].str.lower().str.strip() == song['name'].lower().strip()
            ) & (
                data['artists'].str.lower().str.contains(song['artists'].lower().strip())
            )

            matches = data[mask]
            if not matches.empty:
                song_data = matches.iloc[0]
                input_features.append(song_data[feature_columns].values)
                found_songs.append(song_data['name'])
            else:
                print(f"Песня '{song['name']}' не найдена")
                return pd.DataFrame()

        except Exception as e:
            print(f"Ошибка обработки: {str(e)}")
            return pd.DataFrame()

    # Нормализация данных
    try:
        X = np.stack(input_features)
        X_scaled = scaler.transform(X)
    except ValueError as e:
        print(f"Ошибка нормализации: {str(e)}")
        return pd.DataFrame()

    # Получение эмбеддингов
    with torch.no_grad():
        model.eval()
        device = next(model.parameters()).device
        input_tensor = torch.tensor(X_scaled, dtype=torch.float32).to(device)

        # Для совместимости: если edge_index не используется
        dummy_edge_index = torch.zeros((2, 1), dtype=torch.long).to(device)

        query_embedding = model(input_tensor, dummy_edge_index).mean(dim=0)
        all_embeddings = model(
        torch.tensor(scaler.transform(data[feature_columns]), dtype=torch.float32).to(device),  # Закрыта скобка
        dummy_edge_index
    )

    # Поиск рекомендаций
    nbrs = NearestNeighbors(n_neighbors=n+len(found_songs), metric='cosine')
    nbrs.fit(all_embeddings.cpu().numpy())
    distances, indices = nbrs.kneighbors(query_embedding.cpu().numpy().reshape(1, -1))

    recommendations = data.iloc[indices[0]]
    recommendations = recommendations[~recommendations['name'].isin(found_songs)]

    return recommendations.head(n)[['name', 'artists']]

In [None]:
# Пример использования
if __name__ == "__main__":
    # Загрузка данных
    data = pd.read_csv("data.csv")[['name', 'artists', 'valence', 'year',
                                  'acousticness', 'danceability', 'duration_ms',
                                  'energy', 'explicit', 'instrumentalness',
                                  'key', 'liveness', 'loudness', 'mode',
                                  'popularity', 'speechiness', 'tempo']].dropna()

    # Загрузка модели
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    try:
        model, scaler, feature_columns = load_full_model("fixed_model2.h5", device)
        print("Модель успешно загружена")
    except Exception as e:
        print(f"Ошибка загрузки модели: {str(e)}")
        exit()

    # Тест рекомендаций
    recommendations = get_recommendations(
        model=model,
        scaler=scaler,
        feature_columns=feature_columns,
        data=data,
        song_list=[{'name': 'Bohemian Rhapsody', 'artists': 'Queen'}],
        n=10
    )

    if not recommendations.empty:
        print("\nТоп рекомендаций:")
        print(recommendations.to_string(index=False))
    else:
        print("\nРекомендации не найдены")

Модель успешно загружена





Топ рекомендаций:
                                                             name                                                 artists
                              Bohemian Rhapsody - Remastered 2011                                               ['Queen']
Rooster - Live at the Majestic Theatre, Brooklyn, NY - April 1996                                     ['Alice In Chains']
                                                       The Wolves                                          ['Ben Howard']
                                                    Aléjate de Mí                                              ['Camila']
                                                      Think Twice                                         ['Céline Dion']
                                         The Phantom Of the Opera ['Andrew Lloyd Webber', 'Gerard Butler', 'Emmy Rossum']
                                                     Pagan Poetry                                               ['Björk']
     