In [24]:
import torch
import torch.nn as nn
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
from sklearn.preprocessing import OneHotEncoder

# Define a simple Song class to hold song details
class Song:
    def __init__(self, song_id, description, genre, year):
        self.song_id = song_id
        self.description = description
        self.genre = genre
        self.year = year
        self.similarity_score = 0

# Define the K-armed bandit agent
class KArmedBandit:
    def __init__(self, songs, embedding_model=None, input_embedder=None):
        self.songs = songs  # List of Song objects
        self.num_songs = len(songs)
        self.previous_songs = []  # Track previously suggested songs
        self.embedding_model = embedding_model  # Pre-trained embedding model (Sentence-BERT)
        self.input_embedder = input_embedder  # Model to embed the user input
        self.song_rewards = np.zeros(self.num_songs)  # Rewards for each song

    def encode_input(self, mood, style, year):
        # One-hot encoding for simplicity (could be embeddings)
        input_features = np.array([mood, style, year])
        encoder = OneHotEncoder(sparse_output=False)
        encoded_input = encoder.fit_transform(input_features.reshape(-1, 1))
        return encoded_input.flatten()

    def get_similarity(self, user_input, song):
      # Embed user input into the same space as song descriptions
      user_input_embedding = self.input_embedder(user_input)

      # Ensure user_input_embedding does not require gradients
      user_input_embedding = user_input_embedding.detach().numpy()  # Detach from computation graph and convert to numpy

      # Get the song description embedding (already a numpy array)
      song_vector = self.embedding_model.encode([song.description])[0]  # Get embedding of song description

      # Calculate cosine similarity between user input embedding and song description embedding
      similarity = cosine_similarity([user_input_embedding], [song_vector])[0][0]
      return similarity


    def recommend(self, user_input):
        # Calculate similarity for all songs
        for i, song in enumerate(self.songs):
            song.similarity_score = self.get_similarity(user_input, song)

        # Exploration vs Exploitation - Choose song based on similarity and past rewards
        if np.random.rand() < 0.2:  # Exploration (20% of the time)
            song_idx = np.random.choice(self.num_songs)
        else:  # Exploitation - Recommend based on similarity to user input
            song_idx = np.argmax(self.song_rewards)

        # Penalize for previously recommended song
        if self.songs[song_idx].song_id in self.previous_songs:
            reward = -1  # Penalize for repeating songs
        else:
            reward = self.songs[song_idx].similarity_score
            self.previous_songs.append(self.songs[song_idx].song_id)

        # Update rewards based on similarity to user input and previous song similarity
        self.song_rewards[song_idx] += reward

        return self.songs[song_idx], reward


# Simple embedding model for user input (1 layer NN)
class InputEmbedder(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(InputEmbedder, self).__init__()
        self.fc = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.fc(x)

# Example dataset
songs = [
    Song(1, "Upbeat jazz song with piano and saxophone.", "jazz", "old"),
    Song(2, "Calm melody with piano and strings.", "melody", "mid"),
    Song(3, "Instrumental rock with electric guitar.", "instrumental", "recent"),
    Song(4, "Energetic pop song with strong beats.", "pop", "trending"),
    # Add more songs...
]

# Initialize the bandit agent with the list of songs and a pre-trained embedding model
embedding_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')  # Load pre-trained Sentence-BERT model
input_embedder = InputEmbedder(input_dim=9, output_dim=384)  # Simple NN model to embed user input into 384 dimensions
bandit_agent = KArmedBandit(songs, embedding_model, input_embedder)

# Example user input (one-hot encoded mood/style/year)
user_input = bandit_agent.encode_input(mood="happy", style="jazz", year="old")

# Convert to torch tensor
user_input_tensor = torch.tensor(user_input, dtype=torch.float32)

# Get song recommendation
recommended_song, reward = bandit_agent.recommend(user_input_tensor)

print(f"Recommended Song: {recommended_song.song_id}, Reward: {reward}")




Recommended Song: 1, Reward: 0.055349960923194885
