In [1]:
#mount the drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
#install tensorflow
!pip install tensorflow tensorflow-recommenders

#import necessary libraries
import tensorflow as tf
import tensorflow_recommenders as tfrs
import pandas as pd

Collecting tensorflow-recommenders
  Downloading tensorflow_recommenders-0.7.3-py3-none-any.whl.metadata (4.6 kB)
Downloading tensorflow_recommenders-0.7.3-py3-none-any.whl (96 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m96.2/96.2 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tensorflow-recommenders
Successfully installed tensorflow-recommenders-0.7.3


In [3]:
# ✅ 1. Load the necessary datasets
ratings = pd.read_csv("drive/MyDrive/MSBA AI Final Project/popular_ratings_dataset.csv") #user rating data
songs = pd.read_csv("drive/MyDrive/MSBA AI Final Project/popular_songs_dataset.csv") #song data

# ✅ 2. Create numeric ID mappings
user_id_mapping = {id_: idx for idx, id_ in enumerate(ratings["user_id"].unique(), start=1)}  # Start user ID mapping from 1
song_id_mapping = {id_: idx for idx, id_ in enumerate(ratings["song_id"].unique(), start=1)}  # Start song ID mapping from 1

# ✅ 3. Apply mappings
ratings["user_id_num"] = ratings["user_id"].map(user_id_mapping)  # Apply numeric user_id mapping to DataFrame
ratings["song_id_num"] = ratings["song_id"].map(song_id_mapping)  # Apply numeric song_id mapping to DataFrame

# ✅ 4. Convert to tf.data.Dataset with numeric IDs
tf_ratings = tf.data.Dataset.from_tensor_slices({  # Create a TensorFlow dataset from the numeric user and song IDs
    "user_id": ratings["user_id_num"].values,      # Use the array of numeric user IDs
    "song_id": ratings["song_id_num"].values       # Use the array of numeric song IDs
})

# ✅ 5. Get unique numeric values
unique_user_ids = ratings["user_id_num"].unique()  # Get array of unique numeric user IDs
unique_song_ids = ratings["song_id_num"].unique()  # Get array of unique numeric song IDs

# ✅ 6. Define the model
class MusicRecommenderModel(tfrs.models.Model):  # Define a custom TFRS model for music recommendation
    def __init__(self, num_users, num_songs):    # Constructor accepts total number of users and songs
        super().__init__()                       # Call the base class constructor
        embedding_dim = 32                       # Set the size of the embedding vectors

        self.user_model = tf.keras.Sequential([   # Define the user embedding model
            tf.keras.layers.Embedding(input_dim=num_users + 2, output_dim=embedding_dim)  # Embedding layer for users (+2 to account for 1-based indexing and edge cases)
        ])

        self.song_model = tf.keras.Sequential([   # Define the song embedding model
            tf.keras.layers.Embedding(input_dim=num_songs + 2, output_dim=embedding_dim)  # Embedding layer for songs (+2 for same reason)
        ])

        self.task = tfrs.tasks.Retrieval()  # Define the retrieval task for training the model

    def compute_loss(self, features, training=False):  # Compute loss using embeddings and retrieval task
        user_embeddings = self.user_model(features["user_id"])  # Get embedding vector for user
        song_embeddings = self.song_model(features["song_id"])  # Get embedding vector for song
        return self.task(user_embeddings, song_embeddings)      # Return loss from TFRS retrieval task

# ✅ 7. Shuffle, split, train
tf_ratings = tf_ratings.shuffle(1000, seed=42, reshuffle_each_iteration=False)  # Shuffle the dataset with fixed seed
train = tf_ratings.take(800)  # Take first 800 samples for training
test = tf_ratings.skip(800)   # Use the remaining samples for testing

model = MusicRecommenderModel(len(user_id_mapping), len(song_id_mapping))  # Instantiate model with number of users and songs
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))  # Compile with Adagrad optimizer
model.fit(train.batch(100), epochs=3, verbose=0)  # Train the model on the training set in batches of 100 for 3 epochs

# ✅ 8. Manual Top-K recommendation
sample_user_id = ratings["user_id_num"].iloc[20]  # Select a sample user's numeric ID from the dataset

user_embedding = model.user_model(tf.constant([sample_user_id]))  # Get user embedding for the selected user
all_song_embeddings = model.song_model(tf.constant(unique_song_ids))  # Get embeddings for all songs

scores = tf.linalg.matmul(user_embedding, all_song_embeddings, transpose_b=True)  # Compute similarity scores between user and all songs

top_k = tf.math.top_k(scores, k=5)  # Get indices of top 5 highest scoring songs
recommended_indices = top_k.indices[0].numpy()  # Convert Tensor to NumPy array
recommended_song_ids = [int(unique_song_ids[i]) for i in recommended_indices]  # Retrieve corresponding song IDs

# ✅ 9. Convert numeric song IDs back to original labels
reverse_song_map = {v: k for k, v in song_id_mapping.items()}  # Reverse mapping: numeric ID -> original song ID
recommended_labels = [reverse_song_map[song_id] for song_id in recommended_song_ids]  # Map numeric IDs to original song IDs

# ✅ 10. Display recommendations with metadata
print(f"\n🎧 Top 5 recommendations for user {sample_user_id}:")  # Print user ID
for i, song_id in enumerate(recommended_labels):  # Loop through recommended songs
    # Try to get metadata from user-specific interactions
    song_row = ratings[(ratings["song_id"] == song_id) & (ratings["user_id_num"] == sample_user_id)]  # Match both song and user

    if not song_row.empty:  # If metadata is available for this user-song pair
        title = song_row["song_title"].values[0]  # Get song title
        genre = song_row["song_genres"].values[0]  # Get song genre
        in_library = "Yes" if song_row["in_library"].values[0] else "No"  # Check if in library
        is_favorited = "Yes" if song_row["is_favorited"].values[0] else "No"  # Check if favorited
    else:  # If no user-specific metadata, fall back to general song info
        song_row = songs[songs["song_id"] == song_id]  # Look up song in general song info DataFrame
        title = song_row["song_title"].values[0] if not song_row.empty else song_id  # Use song title or ID if missing
        genre = song_row["song_genres"].values[0] if not song_row.empty else "Unknown"  # Use genre or "Unknown"
        in_library = "Unknown"  # Accounts for no info available
        is_favorited = "Unknown"  # Accounts for no info available

    # Print formatted song recommendation with metadata
    print(f"{i + 1}. {title} | Genre: {genre} | In Library: {in_library} | Favorited: {is_favorited}")



🎧 Top 5 recommendations for user 2:
1. Good 4 U by Olivia Rodrigo | Genre: Pop Rock | In Library: Yes | Favorited: No
2. Skate by Silk Sonic | Genre: R&B | In Library: Yes | Favorited: No
3. Deja Vu by Olivia Rodrigo | Genre: Pop | In Library: Yes | Favorited: Yes
4. You Right by Doja Cat & The Weeknd | Genre: R&B | In Library: No | Favorited: Yes
5. Ghost by Justin Bieber | Genre: Pop | In Library: Yes | Favorited: No
