In [2]:
import pandas as pd
import random
from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate, GridSearchCV
from surprise import accuracy

# Load the dataset
df = pd.read_csv('MyData.csv')

# Select relevant columns and rename them for clarity
df = df[['track_uri', 'track_name', 'artist_name', 'album_name', 'ms_played']]
df.columns = ['track_uri', 'title', 'artist', 'album', 'ms_played']

# Display the first few rows of the dataframe
df.head()

# Generate dummy user_id assuming all users have interacted with all songs
df['user_id'] = 1

# Normalize ms_played
df['ms_played'] = (df['ms_played'] - df['ms_played'].mean()) / df['ms_played'].std()

# Create a reader object for the dataset
reader = Reader(rating_scale=(df['ms_played'].min(), df['ms_played'].max()))

# Load the dataset into the surprise Dataset format
data = Dataset.load_from_df(df[['user_id', 'track_uri', 'ms_played']], reader)

# Perform grid search for hyperparameter tuning
param_grid = {'n_factors': [20, 50, 100],
              'n_epochs': [10, 20, 30],
              'lr_all': [0.002, 0.005, 0.01],
              'reg_all': [0.02, 0.05, 0.1]}
gs = GridSearchCV(SVD, param_grid, measures=['rmse'], cv=3)
gs.fit(data)

# Best SVD model
algo = gs.best_estimator['rmse']

# Perform cross-validation
cross_validate(algo, data, measures=['RMSE'], cv=5, verbose=True)

# Train the algorithm on the entire dataset
trainset = data.build_full_trainset()
algo.fit(trainset)

# Function to get recommendations by artist
def get_recommendations_by_artist1(artist_name, n=5):
    # Filter the dataframe for the given artist's songs
    artist_songs = df[df['artist'].str.contains(artist_name, case=False, na=False)]

    # If no songs found for the artist, return an empty list
    if artist_songs.empty:
        return []

    # Get a list of song IDs by the artist
    artist_song_ids = artist_songs['track_uri'].unique()

    # Predict ratings for all songs
    all_song_ids = df['track_uri'].unique()
    predictions = [algo.predict(1, track_uri) for track_uri in all_song_ids]

    # Sort the predictions by estimated rating
    predictions.sort(key=lambda x: x.est, reverse=True)

    # Filter out the artist's own songs from the recommendations
    recommendations = [pred for pred in predictions if pred.iid in artist_song_ids]
    
    # Shuffle the recommendations to get a random set
    random.shuffle(recommendations)

    # Get up to N random recommendations
    random_recommendations = recommendations[:n]

    # Get the song details for the random recommendations
    random_song_details = [
        (df[df['track_uri'] == pred.iid]['track_uri'].values[0],
        df[df['track_uri'] == pred.iid]['title'].values[0], 
         df[df['track_uri'] == pred.iid]['artist'].values[0], 
         df[df['track_uri'] == pred.iid]['album'].values[0])
        for pred in random_recommendations
    ]

    return random_song_details
def get_recommendations_by_artist2(artist_name, n=5):
    # Filter the dataframe for the given artist's songs
    artist_songs = df[df['artist'].str.contains(artist_name, case=False, na=False)]

    # If no songs found for the artist, return an empty list
    if artist_songs.empty:
        return []

    # Get a list of song IDs by the artist
    artist_song_ids = artist_songs['track_uri'].unique()

    # Predict ratings for all songs
    all_song_ids = df['track_uri'].unique()
    predictions = [algo.predict(1, track_uri) for track_uri in all_song_ids]

    # Sort the predictions by estimated rating
    predictions.sort(key=lambda x: x.est, reverse=True)

    # Filter out the artist's own songs from the recommendations
    recommendations = [pred for pred in predictions if pred.iid not in artist_song_ids]
    
    # Shuffle the recommendations to get a random set
    random.shuffle(recommendations)

    # Get up to N random recommendations
    random_recommendations = recommendations[:n]

    # Get the song details for the random recommendations
    random_song_details = [
        (df[df['track_uri'] == pred.iid]['track_uri'].values[0],
        df[df['track_uri'] == pred.iid]['title'].values[0], 
         df[df['track_uri'] == pred.iid]['artist'].values[0], 
         df[df['track_uri'] == pred.iid]['album'].values[0])
        for pred in random_recommendations
    ]

    return random_song_details

# Example usage: Get up to 10 random recommendations for songs
artist_name = "shreya"
random_recommendations = get_recommendations_by_artist1(artist_name, n=5)

print(f"\nRecommended songs for {artist_name}:")
for track_uri, title, artist, album in random_recommendations:
    print(f"Track_URI:https://open.spotify.com/track/{track_uri}?go=1, Title: {title}, Artist: {artist}, Album: {album}")
random_recommendations = get_recommendations_by_artist2(artist_name, n=5)
print(f"\nSimilar song recommendations by other artists:") 
for track_uri, title, artist, album in random_recommendations:
    print(f"Track_URI:https://open.spotify.com/track/{track_uri}?go=1, Title: {title}, Artist: {artist}, Album: {album}")


Evaluating RMSE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8904  0.9540  0.9412  1.2810  0.8746  0.9882  0.1494  
Fit time          0.02    0.00    0.02    0.00    0.02    0.01    0.01    
Test time         0.00    0.00    0.00    0.00    0.00    0.00    0.00    

Recommended songs for shreya:
Track_URI:https://open.spotify.com/track/3uGgBYzrYUNypQRPw4hQTY?go=1, Title: Ivanu Geleyanalla, Artist: Shreya Ghoshal, Album: Mungaru Male
Track_URI:https://open.spotify.com/track/1gwO79MdYdumgIjxq8eCxB?go=1, Title: Bahara, Artist: Vishal-Shekhar, Album: I Hate Luv Storys (Original Motion Picture Soundtrack)
Track_URI:https://open.spotify.com/track/4omaeKfh7tt9ZYEa9LY0tK?go=1, Title: Araluthiru, Artist: Shreya Ghoshal, Album: Mungaru Male
Track_URI:https://open.spotify.com/track/3KeMulXbLDJBQdY5PZbGEh?go=1, Title: Param Sundari, Artist: Shreya Ghoshal, Album: Mimi
Track_URI:https://open.spotify.com/track/32DWojM