In [73]:
import joblib
import pandas as pd
content_model = joblib.load("content_model.pkl")

In [74]:
import numpy as np
feature_matrix = np.load("feature_matrix.npy")

In [75]:
df = pd.read_csv("E:\\Projects\\Machine Learning\\Music Recommender System\\Music Info.csv")
df2 = pd.read_csv("E:\\Projects\\Machine Learning\\Music Recommender System\\User Listening History.csv")

In [76]:
import json

# Load mappings from saved JSON files
def load_json(filename):
    with open(filename, "r") as f:
        return json.load(f)

user_to_index = load_json("user_to_index.json")
track_to_index = load_json("track_to_index.json")
index_to_track = load_json("index_to_track.json")
track_to_name = load_json("track_to_name.json")

print("Mappings loaded successfully.")


Mappings loaded successfully.


In [77]:
import torch
import torch.nn as nn
class NCF(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=32):
        super(NCF, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.item_embedding = nn.Embedding(num_items, embedding_dim)
        
        # Fully connected layers
        self.fc1 = nn.Linear(embedding_dim * 2, 64)
        self.fc2 = nn.Linear(64, 1)
        self.relu = nn.ReLU()
    
    def forward(self, user_ids, item_ids):
        user_embeds = self.user_embedding(user_ids)
        item_embeds = self.item_embedding(item_ids)
        x = torch.cat([user_embeds, item_embeds], dim=1)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x.squeeze()

num_users = len(user_to_index)
num_items = len(track_to_index)  

collaborative_model = NCF(num_users, num_items)  
state_dict = torch.load("collaborative_model.pth", map_location=torch.device("cpu"))

old_num_items = state_dict['item_embedding.weight'].shape[0]
new_num_items = num_items

if old_num_items < new_num_items:
    # Expand the embedding matrix
    new_embedding = torch.nn.Embedding(new_num_items, collaborative_model.item_embedding.embedding_dim)
    new_embedding.weight.data[:old_num_items] = state_dict['item_embedding.weight']
    state_dict['item_embedding.weight'] = new_embedding.weight

collaborative_model.load_state_dict(state_dict, strict=False)
collaborative_model.eval()


NCF(
  (user_embedding): Embedding(962037, 32)
  (item_embedding): Embedding(50683, 32)
  (fc1): Linear(in_features=64, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
)

In [78]:
def knn_recommend_by_name(song_name, df, feature_matrix, knn):

    try:
        idx = df[df["name"] == song_name].index[0]  
        distances, indices = knn.kneighbors([feature_matrix[idx]])
        recommended_track_ids = df.iloc[indices[0][1:]]["track_id"].tolist()
        recommended_song_names = df[df["track_id"].isin(recommended_track_ids)]["name"].tolist()
        return recommended_song_names

    except IndexError:
        print(f"Song '{song_name}' not found in the dataset.")
        return None
    except KeyError:
        print("Required columns 'name' or 'track_id' not found in DataFrame")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

In [79]:
def recommend_songs_hybrid(user_id, song_name, top_n=10):
    if user_id not in user_to_index:
        print(f"User ID {user_id} not found, using content-based filtering.")
        return knn_recommend_by_name(song_name, df, feature_matrix, content_model)

    user_idx = user_to_index.get(user_id, -1)

    # Ensure user_id is valid
    if user_idx == -1 or user_idx >= collaborative_model.user_embedding.num_embeddings:
        print(f"User ID {user_id} out of range, using content-based filtering.")
        return knn_recommend_by_name(song_name, df, feature_matrix, content_model)

    user_tensor = torch.tensor([user_idx] * len(track_to_index), dtype=torch.long)
    track_tensor = torch.tensor(list(track_to_index.values()), dtype=torch.long)

    # Clip indices if out of range
    user_tensor = torch.clamp(user_tensor, max=collaborative_model.user_embedding.num_embeddings - 1)
    track_tensor = torch.clamp(track_tensor, max=collaborative_model.item_embedding.num_embeddings - 1)

    with torch.no_grad():
        predictions = collaborative_model(user_tensor, track_tensor)

    _, indices = torch.topk(predictions, top_n)
    collab_recommended_ids = [index_to_track[str(idx.item())] for idx in indices]

    # Convert track IDs to song names
    collab_recommended_songs = [track_to_name.get(track_id, "Unknown Song") for track_id in collab_recommended_ids]

    # Content-based filtering recommendations
    content_recommended_songs = knn_recommend_by_name(song_name, df, feature_matrix, content_model)

    # Hybrid Approach: Combine Results
    hybrid_recommendations = list(set(collab_recommended_songs + content_recommended_songs))

    return hybrid_recommendations

In [80]:
print("First few track indices:", list(user_to_index.items())[:10])
print("Last few track indices:", list(track_to_index.items())[-10:])

First few track indices: [('b80344d063b5ccb3212f76538f3d9e43d87dca9e', 0), ('85c1f87fea955d09b4bec2e36aee110927aedf9a', 1), ('bd4c6e843f00bd476847fb75c47b4fb430a06856', 2), ('969cc6fb74e076a68e36a04409cb9d3765757508', 3), ('4bd88bfb25263a75bbdd467e74018f4ae570e5df', 4), ('e006b1a48f466bf59feefed32bec6494495a4436', 5), ('9d6f0ead607ac2a6c2460e4d14fb439a146b7dec', 6), ('9bb911319fbc04f01755814cb5edb21df3d1a336', 7), ('b64cdd1a0bd907e5e00b39e345194768e330d652', 8), ('17aa9f6dbdf753831da8f38c71b66b64373de613', 9)]
Last few track indices: [('TROIHJK12903CECC08', 50673), ('TRMEHFD128F92E4557', 50674), ('TRXWSIN128F9339A11', 50675), ('TRONQMR12903CF533E', 50676), ('TRPIGDW12903CDEB2D', 50677), ('TRQYCFV128F9322F50', 50678), ('TRHQCSH128F42724B7', 50679), ('TRZRODK128F92D68D7', 50680), ('TRGLMEM128F9322F63', 50681), ('TRIPFKO128F42383FE', 50682)]


In [81]:
print(f"Total Tracks Indexed: {len(user_to_index)}")
print(f"Example Mapped Track IDs: {list(user_to_index.keys())[:5]}")

Total Tracks Indexed: 962037
Example Mapped Track IDs: ['b80344d063b5ccb3212f76538f3d9e43d87dca9e', '85c1f87fea955d09b4bec2e36aee110927aedf9a', 'bd4c6e843f00bd476847fb75c47b4fb430a06856', '969cc6fb74e076a68e36a04409cb9d3765757508', '4bd88bfb25263a75bbdd467e74018f4ae570e5df']


In [82]:
user_id = 'b80344d063b5ccb3212f76538f3d9e43d87dca9e'
song_name = "Nothing From Nothing"

recommendations = recommend_songs_hybrid(user_id, song_name)
print("Hybrid Recommendations:", recommendations)

Hybrid Recommendations: ["Somebody Else's Guy", 'Consider Us Dead', 'Pattern Recognition', "Somebody's Me", 'Den Svarta Fanan', 'The Thespian', 'These Are The Days', 'My Little Brother', 'Juicebox', 'Relieved Beyond Repair', 'We the People', 'Another one goes by', 'Living Life', 'Like a Prayer']
