## **Collaborative Filtering for recommendation of spotify using KNN algoirthm.**

In [1]:
from scipy import sparse
from scipy.sparse import csr_matrix
import pandas as pd
!pip install fuzzywuzzy
from fuzzywuzzy import fuzz
from sklearn.neighbors import NearestNeighbors
import plotly.express as px

Collecting fuzzywuzzy
  Downloading https://files.pythonhosted.org/packages/43/ff/74f23998ad2f93b945c0309f825be92e04e0348e062026998b5eefef4c33/fuzzywuzzy-0.18.0-py2.py3-none-any.whl
Installing collected packages: fuzzywuzzy
Successfully installed fuzzywuzzy-0.18.0




### Pivoting features(artist_id/songs_id and user_id with their ratings as value) of dataframes and returning a sparse matrix 

In [2]:
def pivot_features(df):
    print("The dataset in original form:\n", df)
    df_features = df.pivot(index='id', columns='user_id', values='ratings').fillna(0)
    matrix_features = csr_matrix(df_features.values)

    print('Printing the pivoted features of dataset: \n\n', df_features)
    print('\n\n Printing the genereated matrix of the pivoted feature dataset: \n\n', matrix_features)
    
    return matrix_features

### Fetching some insights/statistics from the dataset, and visualising the ratings, artists/songs and other values.

In [3]:
def data_statistics(df,filter_for):
    num_users = len(df.user_id.unique())
    num_items = len(df.id.unique())
    print('There are ', num_users ,' unique users and ', num_items,'unique ', filter_for,' in this data set')
    df_count_temp = pd.DataFrame(df.groupby('ratings').size(), columns=['count'])
    
    
    total = num_users * num_items
    rating_zero_cnt = total - df.shape[0]

    df_ratings_count = df_count_temp.append(
        pd.DataFrame({'count': rating_zero_cnt}, index=[0.0]),
        verify_integrity=True,
    ).sort_index()

    df_ratings_count = df_ratings_count[['count']].reset_index().rename(columns={'index': 'ratings'})
    print('\n\n Printing the statistics for ratings (rating and count of ratings):\n')
    print(df_ratings_count)
    fig = px.bar(df_ratings_count, x='ratings', y='count',color='count')
    fig.show()
    
    df_artist_count = pd.DataFrame(df.groupby('id').size(), columns=['count'])
    df_artist_count = df_artist_count[['count']].reset_index().rename(columns={ 'id' : 'id'})
    print('\n\n Printing the statistics for ',filter_for,' (id and number of ratings given):\n')
    print(df_artist_count)

    fig = px.bar(df_artist_count, x='id', y='count',color='count', labels={'id':'ID for the '+filter_for,'count' : 'Count of ratings'})
    fig.show()

    df_users_count = pd.DataFrame(df.groupby('user_id').size(), columns=['count'])
    df_users_count = df_users_count[['count']].reset_index().rename(columns={'user_id': 'id'})
    print('\n\n Printing the statistics for users (id and number of ratings given by the user):\n')
    print(df_users_count)

    fig = px.bar(df_users_count, x='id', y='count',color='count', labels={'id':'ID for the '+filter_for,'count' : 'Count of ratings'})
    fig.show()

    

### Creating a sparse matrix, and relating the id with the name (artist or song)

In [4]:
def create_sparse_matrix(df):
    artist_user_matrix = df.pivot(index='id', columns='user_id', values='ratings').fillna(0)

    df_new = df.drop_duplicates(subset='id',keep='last')
    artist_to_id = {
        movie: i for i, movie in 
        enumerate(list(df_new.set_index('id').loc[artist_user_matrix.index].name))
    }

    artist_user_mat_sparse = csr_matrix(artist_user_matrix.values)
    print("\n\n Printing the sparse matrix for the dataset:\n",artist_user_mat_sparse)
    return artist_user_mat_sparse, artist_to_id

### Fitting a KNN model

In [5]:
def fit_KNN_model(sparse_matrix):
    model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20, n_jobs=-1)
    model_knn.fit(sparse_matrix)
    return model_knn

### Using fuzzy matcher to match and send the tuples with more than 70% match, more like distance metric.

In [6]:
def fuzzy_match(mapper, fav, verbose=True):
    match_tuple = []

    for title, idx in mapper.items():
        ratio = fuzz.ratio(title.lower(), fav.lower())
        if ratio >= 70:
            match_tuple.append((title, idx, ratio))
    match_tuple = sorted(match_tuple, key=lambda x: x[2])[::-1]
    if not match_tuple:
        return
    if verbose:
        print('Found in the system:', match_tuple[0][0])
    return match_tuple[0][1]

### Main recommendation function, using the knn model to find the nearest neighbours, creating recommendations and reverse mapper is used to fetch the values from recommendations

In [7]:

def make_recommendation(model_knn, data, mapper, fav, n_recommendations):
    result=[]
    model_knn.fit(data)

    print('You have input: ', fav)
    print('\n')
    idx = fuzzy_match(mapper, fav, verbose=True)

    print('Gathering Recommendations..........')
    print('\n')
    distances, indices = model_knn.kneighbors(data[idx], n_neighbors=n_recommendations+1)
    
    raw_recommends = sorted(list(zip(indices.squeeze().tolist(), distances.squeeze().tolist())), key=lambda x: x[1])[:0:-1]

    reverse_mapper = {v: k for k, v in mapper.items()}
 
    print('Recommendations for ', fav)
    for i, (idx, dist) in enumerate(raw_recommends):
        print(i+1,':',reverse_mapper[idx],' is similar to your favourite by ', dist)
        result.append(reverse_mapper[idx])
    return result


### Calling function 

In [8]:
def call_collaborative_KNN(df, favourite, filter_for, num_recomm):
    matrix = pivot_features(df)
    data_statistics(df,filter_for)
    artist_or_song_user_sparse_matrix, artist_to_id = create_sparse_matrix(df)
    model_knn = fit_KNN_model(artist_or_song_user_sparse_matrix)
    result = make_recommendation(
                model_knn=model_knn,
                data=artist_or_song_user_sparse_matrix,
                fav=favourite,
                mapper=artist_to_id,
                n_recommendations=num_recomm)
    return result

## Implementing collaborative filtering for recommendation of **artists**

In [9]:
favourite = 'Selena Gomez'
df_ratings = pd.read_csv('artists_user_ratings.csv')
num_recomm= 10
result_user_pop = call_collaborative_KNN(df_ratings, favourite,'artists',num_recomm)

The dataset in original form:
      id               name  user_id  ratings
0     1  Weird Al Yankovic        1      2.5
1     1  Weird Al Yankovic        2      1.5
2     1  Weird Al Yankovic        3      3.5
3     1  Weird Al Yankovic        4      1.0
4     1  Weird Al Yankovic        5      3.0
..   ..                ...      ...      ...
127  22     Ellie Goulding        6      5.0
128  23          Lady Gaga        6      4.5
129  23          Lady Gaga        3      2.0
130  23          Lady Gaga        2      3.0
131  23          Lady Gaga        7      3.5

[132 rows x 4 columns]
Printing the pivoted features of dataset: 

 user_id   1    2    3    4    5    6    7    8    9    10
id                                                       
1        2.5  1.5  3.5  1.0  3.0  5.0  3.5  1.5  1.0  3.0
2        0.0  3.5  0.0  2.0  0.0  4.5  0.0  3.0  5.0  1.0
3        2.0  0.0  5.0  1.5  3.5  4.0  0.0  4.5  2.5  5.0
4        0.0  3.0  0.0  4.5  5.0  0.0  0.0  0.0  0.0  0.0
5        5.0



 Printing the statistics for  artists  (id and number of ratings given):

    id  count
0    1     10
1    2      6
2    3      8
3    4      3
4    5      8
5    6      9
6    7      6
7    8      9
8    9      5
9   10      4
10  11      7
11  12      5
12  13      8
13  14      8
14  15      5
15  16      5
16  17      3
17  18      5
18  19      4
19  20      5
20  21      3
21  22      2
22  23      4




 Printing the statistics for users (id and number of ratings given by the user):

   id  count
0   1     13
1   2     16
2   3     13
3   4     15
4   5     17
5   6     17
6   7     13
7   8     12
8   9     13
9  10      3




 Printing the sparse matrix for the dataset:
   (0, 0)	2.5
  (0, 1)	1.5
  (0, 2)	3.5
  (0, 3)	1.0
  (0, 4)	3.0
  (0, 5)	5.0
  (0, 6)	3.5
  (0, 7)	1.5
  (0, 8)	1.0
  (0, 9)	3.0
  (1, 1)	3.5
  (1, 3)	2.0
  (1, 5)	4.5
  (1, 7)	3.0
  (1, 8)	5.0
  (1, 9)	1.0
  (2, 0)	2.0
  (2, 2)	5.0
  (2, 3)	1.5
  (2, 4)	3.5
  (2, 5)	4.0
  (2, 7)	4.5
  (2, 8)	2.5
  (2, 9)	5.0
  (3, 1)	3.0
  :	:
  (16, 4)	3.0
  (16, 7)	2.5
  (17, 0)	5.0
  (17, 2)	3.0
  (17, 4)	2.5
  (17, 6)	2.0
  (17, 8)	1.0
  (18, 1)	3.0
  (18, 3)	5.0
  (18, 6)	1.0
  (18, 8)	2.5
  (19, 1)	1.6
  (19, 3)	4.5
  (19, 5)	3.0
  (19, 7)	2.0
  (19, 8)	3.0
  (20, 1)	2.0
  (20, 5)	5.0
  (20, 8)	3.0
  (21, 4)	4.5
  (21, 5)	5.0
  (22, 1)	3.0
  (22, 2)	2.0
  (22, 5)	4.5
  (22, 6)	3.5
You have input:  Selena Gomez


Found in the system: Selena Gomez
Gathering Recommendations..........


Recommendations for  Selena Gomez
1 : Sunidhi Chauhan  is similar to your favourite by  0.2577702796333463
2 : Rahet Fateh Ali Khan  is similar to your favourite by  0

In [10]:
favourite = 'Sonu Nigam'
df_ratings = pd.read_csv('artists_user_ratings.csv')
num_recomm= 10
result_user_indian = call_collaborative_KNN(df_ratings, favourite,'artists',num_recomm)

The dataset in original form:
      id               name  user_id  ratings
0     1  Weird Al Yankovic        1      2.5
1     1  Weird Al Yankovic        2      1.5
2     1  Weird Al Yankovic        3      3.5
3     1  Weird Al Yankovic        4      1.0
4     1  Weird Al Yankovic        5      3.0
..   ..                ...      ...      ...
127  22     Ellie Goulding        6      5.0
128  23          Lady Gaga        6      4.5
129  23          Lady Gaga        3      2.0
130  23          Lady Gaga        2      3.0
131  23          Lady Gaga        7      3.5

[132 rows x 4 columns]
Printing the pivoted features of dataset: 

 user_id   1    2    3    4    5    6    7    8    9    10
id                                                       
1        2.5  1.5  3.5  1.0  3.0  5.0  3.5  1.5  1.0  3.0
2        0.0  3.5  0.0  2.0  0.0  4.5  0.0  3.0  5.0  1.0
3        2.0  0.0  5.0  1.5  3.5  4.0  0.0  4.5  2.5  5.0
4        0.0  3.0  0.0  4.5  5.0  0.0  0.0  0.0  0.0  0.0
5        5.0



 Printing the statistics for  artists  (id and number of ratings given):

    id  count
0    1     10
1    2      6
2    3      8
3    4      3
4    5      8
5    6      9
6    7      6
7    8      9
8    9      5
9   10      4
10  11      7
11  12      5
12  13      8
13  14      8
14  15      5
15  16      5
16  17      3
17  18      5
18  19      4
19  20      5
20  21      3
21  22      2
22  23      4




 Printing the statistics for users (id and number of ratings given by the user):

   id  count
0   1     13
1   2     16
2   3     13
3   4     15
4   5     17
5   6     17
6   7     13
7   8     12
8   9     13
9  10      3




 Printing the sparse matrix for the dataset:
   (0, 0)	2.5
  (0, 1)	1.5
  (0, 2)	3.5
  (0, 3)	1.0
  (0, 4)	3.0
  (0, 5)	5.0
  (0, 6)	3.5
  (0, 7)	1.5
  (0, 8)	1.0
  (0, 9)	3.0
  (1, 1)	3.5
  (1, 3)	2.0
  (1, 5)	4.5
  (1, 7)	3.0
  (1, 8)	5.0
  (1, 9)	1.0
  (2, 0)	2.0
  (2, 2)	5.0
  (2, 3)	1.5
  (2, 4)	3.5
  (2, 5)	4.0
  (2, 7)	4.5
  (2, 8)	2.5
  (2, 9)	5.0
  (3, 1)	3.0
  :	:
  (16, 4)	3.0
  (16, 7)	2.5
  (17, 0)	5.0
  (17, 2)	3.0
  (17, 4)	2.5
  (17, 6)	2.0
  (17, 8)	1.0
  (18, 1)	3.0
  (18, 3)	5.0
  (18, 6)	1.0
  (18, 8)	2.5
  (19, 1)	1.6
  (19, 3)	4.5
  (19, 5)	3.0
  (19, 7)	2.0
  (19, 8)	3.0
  (20, 1)	2.0
  (20, 5)	5.0
  (20, 8)	3.0
  (21, 4)	4.5
  (21, 5)	5.0
  (22, 1)	3.0
  (22, 2)	2.0
  (22, 5)	4.5
  (22, 6)	3.5
You have input:  Sonu Nigam


Found in the system: Sonu Nigam
Gathering Recommendations..........


Recommendations for  Sonu Nigam
1 : Selena Gomez  is similar to your favourite by  0.36771534510439996
2 : Palak Mucchal  is similar to your favourite by  0.33122821594858

In [11]:
favourite = 'Zindagi'
df_songs = pd.read_csv('songs_users_ratings.csv')
num_recomm= 10
result_user_hindi_songs = call_collaborative_KNN(df_songs, favourite,'songs',num_recomm)

The dataset in original form:
     id             name  user_id  ratings
0    1   Aabaad Barbaad        1      4.5
1    1   Aabaad Barbaad        2      2.0
2    1   Aabaad Barbaad        5      3.0
3    1   Aabaad Barbaad        4      5.0
4    2    Hardum Humdum        1      5.0
..  ..              ...      ...      ...
57  21  On Top of World        2      4.5
58  21  On Top of World        3      3.5
59  21  On Top of World        4      2.5
60  21  On Top of World        5      4.0
61  21  On Top of World        6      3.5

[62 rows x 4 columns]
Printing the pivoted features of dataset: 

 user_id    1    2    3    4    5    6
id                                   
1        4.5  2.0  0.0  5.0  3.0  0.0
2        5.0  0.0  1.0  3.5  0.0  0.0
3        4.5  0.0  2.0  0.0  0.0  1.0
4        0.0  0.0  0.0  4.5  0.0  1.0
5        5.0  0.0  3.0  4.0  3.5  2.0
6        0.0  2.0  0.0  3.5  0.0  0.0
7        0.0  3.5  3.0  4.5  2.5  2.0
8        4.5  0.0  0.0  4.0  2.0  0.0
9        4.5  3.0



 Printing the statistics for  songs  (id and number of ratings given):

    id  count
0    1      4
1    2      3
2    3      3
3    4      2
4    5      5
5    6      2
6    7      5
7    8      3
8    9      5
9   10      2
10  11      2
11  12      1
12  13      3
13  14      3
14  15      2
15  16      5
16  17      3
17  18      1
18  19      1
19  20      1
20  21      6




 Printing the statistics for users (id and number of ratings given by the user):

   id  count
0   1     10
1   2     10
2   3     11
3   4     14
4   5     10
5   6      7




 Printing the sparse matrix for the dataset:
   (0, 0)	4.5
  (0, 1)	2.0
  (0, 3)	5.0
  (0, 4)	3.0
  (1, 0)	5.0
  (1, 2)	1.0
  (1, 3)	3.5
  (2, 0)	4.5
  (2, 2)	2.0
  (2, 5)	1.0
  (3, 3)	4.5
  (3, 5)	1.0
  (4, 0)	5.0
  (4, 2)	3.0
  (4, 3)	4.0
  (4, 4)	3.5
  (4, 5)	2.0
  (5, 1)	2.0
  (5, 3)	3.5
  (6, 1)	3.5
  (6, 2)	3.0
  (6, 3)	4.5
  (6, 4)	2.5
  (6, 5)	2.0
  (7, 0)	4.5
  :	:
  (12, 3)	4.5
  (12, 4)	2.0
  (12, 5)	1.5
  (13, 1)	4.5
  (13, 2)	5.0
  (13, 4)	2.5
  (14, 1)	4.0
  (14, 3)	1.5
  (15, 0)	3.5
  (15, 1)	5.0
  (15, 2)	4.5
  (15, 3)	2.0
  (15, 4)	4.0
  (16, 1)	3.5
  (16, 2)	4.5
  (16, 3)	2.0
  (17, 2)	4.0
  (18, 1)	4.5
  (19, 2)	4.5
  (20, 0)	2.5
  (20, 1)	4.5
  (20, 2)	3.5
  (20, 3)	2.5
  (20, 4)	4.0
  (20, 5)	3.5
You have input:  Zindagi


Found in the system: Zindagi
Gathering Recommendations..........


Recommendations for  Zindagi
1 : On Top of World  is similar to your favourite by  0.5434902463705356
2 : Hardum Humdum  is similar to your favourite by  0.508646915963114
3 : N

In [12]:
favourite = 'Here With Me'
df_songs = pd.read_csv('songs_users_ratings.csv')
num_recomm= 10
result_user_english_songs = call_collaborative_KNN(df_songs, favourite,'songs',num_recomm)

The dataset in original form:
     id             name  user_id  ratings
0    1   Aabaad Barbaad        1      4.5
1    1   Aabaad Barbaad        2      2.0
2    1   Aabaad Barbaad        5      3.0
3    1   Aabaad Barbaad        4      5.0
4    2    Hardum Humdum        1      5.0
..  ..              ...      ...      ...
57  21  On Top of World        2      4.5
58  21  On Top of World        3      3.5
59  21  On Top of World        4      2.5
60  21  On Top of World        5      4.0
61  21  On Top of World        6      3.5

[62 rows x 4 columns]
Printing the pivoted features of dataset: 

 user_id    1    2    3    4    5    6
id                                   
1        4.5  2.0  0.0  5.0  3.0  0.0
2        5.0  0.0  1.0  3.5  0.0  0.0
3        4.5  0.0  2.0  0.0  0.0  1.0
4        0.0  0.0  0.0  4.5  0.0  1.0
5        5.0  0.0  3.0  4.0  3.5  2.0
6        0.0  2.0  0.0  3.5  0.0  0.0
7        0.0  3.5  3.0  4.5  2.5  2.0
8        4.5  0.0  0.0  4.0  2.0  0.0
9        4.5  3.0



 Printing the statistics for  songs  (id and number of ratings given):

    id  count
0    1      4
1    2      3
2    3      3
3    4      2
4    5      5
5    6      2
6    7      5
7    8      3
8    9      5
9   10      2
10  11      2
11  12      1
12  13      3
13  14      3
14  15      2
15  16      5
16  17      3
17  18      1
18  19      1
19  20      1
20  21      6




 Printing the statistics for users (id and number of ratings given by the user):

   id  count
0   1     10
1   2     10
2   3     11
3   4     14
4   5     10
5   6      7




 Printing the sparse matrix for the dataset:
   (0, 0)	4.5
  (0, 1)	2.0
  (0, 3)	5.0
  (0, 4)	3.0
  (1, 0)	5.0
  (1, 2)	1.0
  (1, 3)	3.5
  (2, 0)	4.5
  (2, 2)	2.0
  (2, 5)	1.0
  (3, 3)	4.5
  (3, 5)	1.0
  (4, 0)	5.0
  (4, 2)	3.0
  (4, 3)	4.0
  (4, 4)	3.5
  (4, 5)	2.0
  (5, 1)	2.0
  (5, 3)	3.5
  (6, 1)	3.5
  (6, 2)	3.0
  (6, 3)	4.5
  (6, 4)	2.5
  (6, 5)	2.0
  (7, 0)	4.5
  :	:
  (12, 3)	4.5
  (12, 4)	2.0
  (12, 5)	1.5
  (13, 1)	4.5
  (13, 2)	5.0
  (13, 4)	2.5
  (14, 1)	4.0
  (14, 3)	1.5
  (15, 0)	3.5
  (15, 1)	5.0
  (15, 2)	4.5
  (15, 3)	2.0
  (15, 4)	4.0
  (16, 1)	3.5
  (16, 2)	4.5
  (16, 3)	2.0
  (17, 2)	4.0
  (18, 1)	4.5
  (19, 2)	4.5
  (20, 0)	2.5
  (20, 1)	4.5
  (20, 2)	3.5
  (20, 3)	2.5
  (20, 4)	4.0
  (20, 5)	3.5
You have input:  Here With Me


Found in the system: Here With Me
Gathering Recommendations..........


Recommendations for  Here With Me
1 : Dil Hi Toh Hai  is similar to your favourite by  0.42514949027133997
2 : Alone  is similar to your favourite by  0.42067587797834

## **Implementing Precision Recall Metric for Recommendation Evaluation**

### Implementing evaluation metric for artists recommendation

In [24]:
user_indian_artist_lover = ['Sunidhi Chauhan','Shreya Ghoshal','Arijit Singh','A.R Rahman','Sonu Nigam','Palak Mucchal','Rahet Fateh Ali Khan','Mohit Chouhan','Amit Trivedi']
user_english_pop_artist_lover =['Lady Gaga', 'Ellie Goulding', 'Halsey', 'Akon', 'Miley Cyrus', 'Sam Smith', 'Adele', 'Katy Perry', 'Taylor Swift', 'Selena Gomez', '50 Cent', '3 Doors Down'] 
number_of_recommendations_made = num_recomm

In [25]:
number_of_relevant_recommendations_user_indian_artist_lover = []
for value in result_user_indian:
  if value in user_indian_artist_lover:
    number_of_relevant_recommendations_user_indian_artist_lover.append(1)
  else:
    number_of_relevant_recommendations_user_indian_artist_lover.append(0)

print("Number of relevant recommendations made to Indian artists lover user: ",number_of_relevant_recommendations_user_indian_artist_lover)


Number of relevant recommendations made to Indian artists lover user:  [0, 1, 1, 0, 0, 0, 1, 1, 0, 1]


In [26]:
number_of_relevant_recommendations_user_english_pop_artist_lover = []
for value in result_user_pop:
  if value in user_english_pop_artist_lover:
    number_of_relevant_recommendations_user_english_pop_artist_lover.append(1)
  else:
    number_of_relevant_recommendations_user_english_pop_artist_lover.append(0)


print("Number of relevant recommendations made to english pop artists lover user: ",number_of_relevant_recommendations_user_english_pop_artist_lover)


Number of relevant recommendations made to english pop artists lover user:  [0, 0, 0, 1, 1, 1, 0, 0, 0, 1]


In [27]:
precision_for_user_indian_artist_lover = sum(number_of_relevant_recommendations_user_indian_artist_lover) / number_of_recommendations_made
precision_for_user_english_pop_artist_lover = sum(number_of_relevant_recommendations_user_english_pop_artist_lover) / number_of_recommendations_made

print( "Calculated Precision for Indian artists lover: ", precision_for_user_indian_artist_lover )
print( "Calculated Precision for english pop artists lover: ", precision_for_user_english_pop_artist_lover )

avg_precision_artists = (precision_for_user_indian_artist_lover + precision_for_user_english_pop_artist_lover)/2
print('The average precision for artists recommendation :' , avg_precision_artists)

Calculated Precision for Indian artists lover:  0.5
Calculated Precision for english pop artists lover:  0.4
The average precision for artists recommendation : 0.45


In [28]:
recall_for_user_indian_artist_lover = sum(number_of_relevant_recommendations_user_indian_artist_lover) / len(user_indian_artist_lover)
recall_for_english_pop_artist_lover = sum(number_of_relevant_recommendations_user_english_pop_artist_lover) / len(user_english_pop_artist_lover)

print( "Calculated recalll for Indian artists lover: ", recall_for_user_indian_artist_lover )
print( "Calculated recall for english pop artists lover: ", recall_for_english_pop_artist_lover )

avg_recall_artists =(recall_for_user_indian_artist_lover + recall_for_english_pop_artist_lover)/2
print('The average recall for artists recommendation :' , avg_recall_artists)


Calculated recalll for Indian artists lover:  0.5555555555555556
Calculated recall for english pop artists lover:  0.3333333333333333
The average recall for artists recommendation : 0.4444444444444444


### Implementing evaluation metric for songs recommendation

In [29]:
user_hindi_songs_lover = ['Aabaad Barbaad','Hardum Humdum','Meri Tum Ho','Dil Julaha','Der Lagi Lekin','Dil Hi Toh Hai','Nadaniyaan','Dil Chahta Hai','Noor E Khuda','Quaafirana','Zindagi','Shayad','Humraah']
user_english_songs_lover =['Still Your King', 'You Make me', 'Don\'t Let Me Down', 'Waiting For Love', 'Here With Me', 'Alone', 'Burn', 'On Top of World'] 
number_of_recommendations_made = num_recomm

number_of_relevant_recommendations_user_hindi_songs_lover = []
for value in result_user_hindi_songs:
  if value in user_hindi_songs_lover:
    number_of_relevant_recommendations_user_hindi_songs_lover.append(1)
  else:
    number_of_relevant_recommendations_user_hindi_songs_lover.append(0)

print("Number of relevant recommendations made to Indian artists lover user: ",number_of_relevant_recommendations_user_hindi_songs_lover)


Number of relevant recommendations made to Indian artists lover user:  [0, 1, 1, 1, 1, 1, 1, 1, 1, 1]


In [30]:
number_of_relevant_recommendations_user_english_songs_lover = []
for value in result_user_english_songs:
  if value in user_english_songs_lover:
    number_of_relevant_recommendations_user_english_songs_lover.append(1)
  else:
    number_of_relevant_recommendations_user_english_songs_lover.append(0)


print("Number of relevant recommendations made to english pop artists lover user: ",number_of_relevant_recommendations_user_english_songs_lover)


Number of relevant recommendations made to english pop artists lover user:  [0, 1, 1, 1, 0, 1, 1, 1, 0, 1]


In [31]:
precision_for_user_hindi_songs_lover = sum(number_of_relevant_recommendations_user_hindi_songs_lover) / number_of_recommendations_made
precision_for_user_english_songs_lover = sum(number_of_relevant_recommendations_user_english_songs_lover) / number_of_recommendations_made

print( "Calculated Precision for Hindi songs lover: ", precision_for_user_hindi_songs_lover )
print( "Calculated Precision for English songs lover: ", precision_for_user_english_songs_lover )

avg_precision_songs = (precision_for_user_hindi_songs_lover + precision_for_user_english_songs_lover)/2
print('The average precision for songs recommendation :' , avg_precision_songs)

Calculated Precision for Hindi songs lover:  0.9
Calculated Precision for English songs lover:  0.7
The average precision for songs recommendation : 0.8


In [32]:
recall_for_user_hindi_songs_lover = sum(number_of_relevant_recommendations_user_hindi_songs_lover) / len(user_hindi_songs_lover)
recall_for_english_songs_lover = sum(number_of_relevant_recommendations_user_english_songs_lover) / len(user_english_songs_lover)

print( "Calculated recalll for hindi songs lover: ", recall_for_user_hindi_songs_lover )
print( "Calculated recall for english songs lover: ", recall_for_english_songs_lover )

avg_recall_songs = (recall_for_user_hindi_songs_lover + recall_for_english_songs_lover)/2
print('The average recall for songs recommendation :' , avg_recall_songs )

Calculated recalll for hindi songs lover:  0.6923076923076923
Calculated recall for english songs lover:  0.875
The average recall for songs recommendation : 0.7836538461538461


In [33]:
precision_and_recall_x = ['Indian artist', 'English pop artist', 'Hindi songs',
              'English songs']
precision_y =[precision_for_user_indian_artist_lover,precision_for_user_english_pop_artist_lover,
              precision_for_user_hindi_songs_lover,precision_for_user_english_songs_lover ]
recall_y = [recall_for_user_indian_artist_lover,recall_for_english_pop_artist_lover,
              recall_for_user_hindi_songs_lover,recall_for_english_songs_lover]

In [35]:
from plotly import graph_objects as go


fig3 = go.Figure(
    data=[
    go.Bar(name="Precision",
            x= precision_and_recall_x ,
            y= precision_y),
    go.Bar( name="Recall",
            x= precision_and_recall_x ,
            y= recall_y)
])
# Change the bar mode
fig3.update_layout(barmode='group' , title = 'Precision & Recall evaluation metric for both the dataset', yaxis_title =' Precision & Recall in percentage',
                   xaxis_title = 'Type of recommendations')
fig3.show()