In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.metrics import euclidean_distances
from scipy.spatial.distance import cdist
import warnings
warnings.filterwarnings("ignore")

In [2]:
songs  = pd.read_csv('songs_scaled.csv',index_col=0)

In [3]:
def retrieve_rows(song_list, songs_df):
    rows = []
    for x in song_list:   
        result = songs_df[songs_df['name'].str.contains(str(x), case=False, na=False)]
        if not result.empty:
            first_row = result.iloc[0]
            rows.append(first_row)
    new_df = pd.DataFrame(rows)
    return new_df

In [10]:
def get_user_interest_vector(User_History,Ratings) :
    weighted_row = np.zeros(1007)
    for x in range(len(User_History)) :
        row = (User_History.iloc[x,2:])
        row = np.array(row)
        weighted_row = weighted_row + row * Ratings[x]
    User_Profile = np.array(weighted_row / np.sum(weighted_row))
    return User_Profile

In [5]:
def get_pool_of_similar_songs(User_Interest,songs_df,pool_size) :
    Matrix  =  songs_df.iloc[:,2:]
    User_Interest = User_Interest.reshape(1,-1)
    Utility  = Matrix * User_Interest
    Sum_vector = np.sum(Utility, axis=1)
    Resultant = songs.copy(deep=True)
    Resultant['sum'] = Sum_vector
    Resultant = Resultant.sort_values(by='sum', ascending=False)
    return Resultant.iloc[:pool_size,]

In [6]:
def recommender(User_ratings):
    Songs_l = User_ratings.iloc[:,0]
    Ratings = User_ratings.iloc[:,1]
    User_History = retrieve_rows(Songs_l,songs)
    User_Interest_Vector = get_user_interest_vector(User_History,Ratings)
    Songs_Pool = get_pool_of_similar_songs(User_Interest_Vector,songs,500)
    Clean_Pool = Songs_Pool.drop_duplicates(subset='name', keep='first')
    Clean_Pool.to_csv('Recommendations.csv')
    top_Header = Clean_Pool['name']
    Side_Header = User_History['name']
    Sim = calculate_cosine_similarity(User_History.iloc[:,2:], Clean_Pool.iloc[:,2:-1])
    cosine_similarity_df = pd.DataFrame(Sim, index=Side_Header, columns=top_Header)
    cosine_similarity_df.to_csv('Cosine_Similarity.csv')
    my_dict = {}  # Initialize an empty dictionary
    N = 2
    for col in range(cosine_similarity_df.shape[1]) :
        column_name = cosine_similarity_df.columns[col]
        Item_to_Item_cosine = cosine_similarity_df.iloc[:, col]  
        Item_to_Item_cosine.head()
        Item_to_Item_cosine = Item_to_Item_cosine.sort_values(ascending=False)
        Predicted_Rating = 0
        for x in range(N) :
            result_row = cosine_similarity_df[cosine_similarity_df.iloc[:, col] == Item_to_Item_cosine[x]]
            index = result_row.index[0]
            Predicted_Rating = Predicted_Rating + get_rating(str(index), User_ratings) * Item_to_Item_cosine[x]
            if x == N-1 : 
                my_dict[str(column_name)] = Predicted_Rating/N

#     # Printing each key-value pair in a loop
#     for key, value in my_dict.items():
#         print(key,"   ", value)
    top_10_values = dict(sorted(my_dict.items(), key=lambda item: item[1], reverse=True)[:20])
#     for key, value in top_10_values.items():
#         print(f"Key: {key}, Value: {value}")
    return top_10_values

In [7]:
def get_rating(song_name, ratings_df):
    # Searching for substring matches in 'name' column
    result = ratings_df[ratings_df['name'].str.contains(song_name, case=False)]
    
    if not result.empty:
        # Retrieve the first rating if a match is found
        rating = result.iloc[0]['Rating']
        return rating
    else:
        return "Song not found"


In [8]:

def calculate_cosine_similarity(user_history_df, song_pool_df):    
    # Extracting values from DataFrames
    user_history_values = user_history_df.values
    song_pool_values = song_pool_df.values
    
    # Calculating cosine similarity matrix
    similarity_matrix = cosine_similarity(user_history_values, song_pool_values)
    
    return similarity_matrix

In [25]:
User_ratings = pd.DataFrame({
    "name": ["Pehli Nazar Mein", 
           "Doorie",
           "Soch Na Sake", 
           "Suit Suit", 
           "Lagdi Lahore Di From Street Dancer 3D", 
           "Morni Banke From Badhaai Ho"],
    "Rating": [5,4,4,5,4,5],
    
})
top_10_values = recommender(User_ratings)
df = pd.DataFrame(list(top_10_values.items()), columns=['Key', 'Value'])


In [26]:
df.head(20)

Unnamed: 0,Key,Value
0,Morni Banke From Badhaai Ho,4.841528
1,Surma Surma,4.81186
2,Teri Choriyaan From Chhalaang,4.686898
3,Pehli Nazar Mein,4.431554
4,Doorie,4.414442
5,Tera Hone Laga Hoon,4.396085
6,Valentine Mashup 2019 DJ Notorious Lijo George,4.383969
7,Be Intehaan,4.375791
8,Tu Jaane Na,4.369746
9,Valentine Mashup 2017 By DJ Notorious,4.352796


In [19]:
User_ratings.head()

Unnamed: 0,name,Rating
0,Pehli Nazar Mein,3
1,Tera Hone Laga Hoon,2
2,Suit Suit,5
3,Lagdi Lahore Di From Street Dancer 3D,5


In [33]:
Actual = pd.DataFrame({
    "Key": ["Slowly Slowly","Tera Hone Laga Hoon", "Lagdi Lahore Di From Street Dancer 3D", "Dil Meri Na Sune"],
    "Value_": [3.8, 5, 4, 5],
})



In [35]:
# Performing the merge/join operation
merged= pd.merge(df, Actual, on='Key', how='inner')  # Change 'how' to fit your merge type

print(merged)

                                     Key     Value  Value_
0                    Tera Hone Laga Hoon  4.396085     5.0
1                       Dil Meri Na Sune  4.315250     5.0
2  Lagdi Lahore Di From Street Dancer 3D  4.155391     4.0
3                          Slowly Slowly  4.020845     3.8


In [41]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Assuming you have a merged dataframe 'merged' with 'Value' as predicted values and 'Value_' as actual values

# Compute RMSE
rmse = np.sqrt(mean_squared_error(merged['Value_'], merged['Value']))
print(f"RMSE: {rmse}")


RMSE: 0.4760554175013279


In [42]:
# Compute MAE
mae = mean_absolute_error(merged['Value_'], merged['Value'])




print(f"MAE: {mae}")


MAE: 0.416225379570067


In [43]:
from sklearn.metrics import r2_score

# Assuming 'merged' contains 'Value' as predicted values and 'Value_' as actual values

# Compute R² score
r2 = r2_score(merged['Value_'], merged['Value'])

print(f"R² score: {r2}")


R² score: 0.2629959007077607
