In [35]:
import pandas as pd
import numpy as np
import networkx as nx
from sklearn.metrics.pairwise import cosine_similarity

In [36]:
cleaned_tracks_dataset = pd.read_csv('cleaned_tracks_dataset.csv')
user_tracks_cleaned = pd.read_csv('users_tracks_cleaned.csv')

In [37]:
features = ['artistname', 'track_popularity', 'playlist_genre', 'playlist_subgenre', 'danceability', 'energy', 'key', 'loudness',
            'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms']

track_features = cleaned_tracks_dataset[features]
track_features = track_features.set_index(cleaned_tracks_dataset['track_id'])

In [38]:
G = nx.Graph()

user_ids = user_tracks_cleaned['user_id'].unique()
track_ids = cleaned_tracks_dataset['track_id'].unique()

for user_id in user_ids:
    G.add_node(user_id, type='user')

for track_id in track_ids:
    G.add_node(track_id, type='track')


In [39]:
for _, row in user_tracks_cleaned.iterrows():
    user_id = row['user_id']
    favorite_tracks = row[1:].dropna().astype(int).tolist()
    for track_id in favorite_tracks:
        G.add_edge(user_id, track_id)


In [40]:
def recommend(user_id, G, track_features, top_n=3):
    neighbors = list(G.neighbors(user_id))

    if not neighbors:
        return []

    listened_tracks = [n for n in neighbors if G.nodes[n]['type'] == 'track']
    listened_features = track_features.loc[listened_tracks].values

    avg_features = np.mean(listened_features, axis=0)

    similarity_scores = cosine_similarity([avg_features], track_features.values)

    sorted_indices = np.argsort(similarity_scores[0])[::-1]

    recommended_tracks = []
    for idx in sorted_indices:
        track_id = track_features.index[idx]
        if track_id not in listened_tracks:
            recommended_tracks.append(track_id)
        if len(recommended_tracks) >= top_n:
            break

    return recommended_tracks


In [41]:
user_recommendations_GNN = pd.DataFrame(columns=['user', 'rec_1', 'rec_2', 'rec_3'])

for user_id in user_ids:
    recommendations = recommend(user_id, G, track_features)
    new_row = pd.DataFrame({
        'user': [user_id],
        'rec_1': [recommendations[0] if len(recommendations) > 0 else None],
        'rec_2': [recommendations[1] if len(recommendations) > 1 else None],
        'rec_3': [recommendations[2] if len(recommendations) > 2 else None]
    })
    user_recommendations_GNN = pd.concat([user_recommendations_GNN, new_row], ignore_index=True)

user_recommendations_GNN.to_csv('user_recommendations_GNN.csv', index=False)