In [1]:
%pip install gradio
%pip install --upgrade pydantic
%pip install torch

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.



In [47]:
import gradio as gr
import torch
import torch.nn as nn
from joblib import load
import pandas as pd
import ipywidgets as widgets
from IPython.display import display

top_tags = pd.read_csv('data/top_tags.csv')
user_tags_dict = top_tags.groupby('userID')['tags'].apply(list).to_dict()


class ImprovedSongRecommender(nn.Module):
    def __init__(self, num_titles):
        super(ImprovedSongRecommender, self).__init__()
        self.fc1 = nn.Linear(2, 128)  # Adjusted input size
        self.bn1 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 256)
        self.bn2 = nn.BatchNorm1d(256)
        self.fc3 = nn.Linear(256, 128)
        self.bn3 = nn.BatchNorm1d(128)
        self.output = nn.Linear(128, num_titles)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn3(self.fc3(x)))
        x = self.dropout(x)
        x = self.output(x)
        return x


model_path = "improved_model.pth"
num_unique_titles = 4855

model = ImprovedSongRecommender(num_titles=num_unique_titles)
model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
model.eval()

label_encoders_path = "data/new_label_encoders.joblib"
scaler_path = "data/new_scaler.joblib"

label_encoders = load(label_encoders_path)
scaler = load(scaler_path)

index_to_song_title = {index: title for index,
                       title in enumerate(label_encoders['title'].classes_)}


def encode_input(tags, artist_name):
    tags = [tag.strip() for tag in tags.split(',')]
    artist_name = artist_name.strip().replace('\n', '')

    encoded_tags = []
    for tag in tags:
        try:
            encoded_tag = label_encoders['tags'].transform([tag])
        except ValueError:
            encoded_tag = label_encoders['tags'].transform(['unknown'])
        encoded_tags.extend(encoded_tag)

    # Pad or truncate the list of encoded tags to a fixed size
    encoded_tags = encoded_tags[:1]  # Truncate if more than 5 tags
    while len(encoded_tags) < 1:  # Pad with 'unknown' if fewer than 5 tags
        encoded_tags.append(label_encoders['tags'].transform(['unknown'])[0])

    if artist_name:
        try:
            encoded_artist = label_encoders['artist_name'].transform([artist_name])[
                0]
        except ValueError:
            encoded_artist = label_encoders['artist_name'].transform(['unknown'])[
                0]
    else:
        encoded_artist = label_encoders['artist_name'].transform(['unknown'])[
            0]

    return encoded_tags + [encoded_artist]


def recommend_songs(tags, user_id=None):
    try:
        if user_id is not None:
            tags = ', '.join(user_tags_dict.get(int(user_id), []))
        encoded_input = encode_input(tags, "")
        input_tensor = torch.tensor(encoded_input).float().unsqueeze(0)

        with torch.no_grad():
            output = model(input_tensor)

        recommendations_indices = torch.topk(
            output, 5).indices.squeeze().tolist()
        recommendations = [index_to_song_title.get(
            idx, "Unknown song") for idx in recommendations_indices]

        formatted_output = [
            f"Recommendation {i+1}: {rec}" for i, rec in enumerate(recommendations)]
        return formatted_output
    except Exception as e:
        return str(e)


interface = gr.Interface(
    fn=recommend_songs,
    inputs=[
        gr.Textbox(lines=1, placeholder="Enter Tags (e.g., rock)"),
        gr.Dropdown(choices=list(user_tags_dict.keys()),
                    label="User ID (optional)")
    ],
    outputs=gr.Textbox(label="Recommendations"),
    title="Music Recommendation System",
    description="Enter tags and (optionally) select a user ID to get music recommendations."
)

interface.launch()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Running on local URL:  http://127.0.0.1:7879

To create a public link, set `share=True` in `launch()`.




In [13]:
import pandas as pd
df = pd.read_csv('db/data/user_preferences.csv')

In [16]:
df = df[['userID', 'tags']]
df = df.dropna()

In [22]:
# Assuming 'tags' column contains the tags and 'userID' column contains the user IDs

# Split the tags into separate rows
df_tags = df.assign(tags=df['tags'].str.split(',')).explode('tags')

# Remove leading/trailing spaces
df_tags['tags'] = df_tags['tags'].str.strip()

# Get the top 20 tags for each user
top_tags = df_tags.groupby('userID')['tags'].apply(lambda x: x.value_counts().head(20)).reset_index()

# Rename the columns
top_tags.columns = ['userID', 'tag', 'count']

# Display the new DataFrame
top_tags = top_tags[['userID', 'tag']]

In [23]:
top_tags

Unnamed: 0,userID,tag
0,0,rock
1,0,pop
2,0,classic rock
3,0,female vocalists
4,0,80s
...,...,...
195,9,60s
196,9,Love
197,9,heavy metal
198,9,oldies


In [24]:
df.to_csv('data/top_tags.csv', index=False)