In [None]:
'''

Dataset Information

The dataset contains two files
triplet_file 
metadata_file. 

The triplet file contains user_id, song_id, and how many times listen to the song.
The metadata file contains song_id, title, release, artist_name and year

The Dataset is a mixture of the song from the various website with the rating that users gave after listening to the song.
There are 3 types of recommendation systems
Content-based
Content-based filtering is a type of recommender system that attempts to guess what a user may like based on that user's activity. Content-based filtering makes recommendations by using keywords and attributes assigned to objects in a database (e.g., items in an online marketplace) and matching them to a user profile.

Collaborative
Collaborative filtering is a technique that can filter out items that a user might like on the basis of reactions by similar users.
It works by searching a large group of people and finding a smaller set of users with tastes similar to a particular user. It looks at the items they like and combines them to create a ranked list of suggestions.
There are many ways to decide which users are similar and combine their choices to create a list of recommendations. This article will show you how to do that with Python.

Popularity
It is a type of recommendation system which works on the principle of popularity and or anything which is in trend. These systems check the product or movies which are in trend or are most popular among the users and directly recommend those.

'''

# Import Libraries 
import pandas as pd
import numpy as np

# Change the directory 
import os 
os.chdir ('C:\\Users\\priva\\Documents\\Acmegrade')

# import the custom module created for recommendation - Recommenders
import Recommenders as Recommenders

# Load the Data - triplets file
song_df_1 = pd.read_csv('triplets_file.csv')
print (song_df_1.shape)
print (song_df_1.head())

# Load the Data -> song_data
song_df_2 = pd.read_csv('song_data.csv')
print (song_df_2.shape)
display  (song_df_2.head())

'''
Combine two data frames and create one data Frame 
Common column: Song Id 
Remove any duplicate song id in the data frame - song_df_2.
This is to reduce inflated results after joining data frames
'''

song_df = pd.merge(song_df_1, song_df_2.drop_duplicates(['song_id']), on='song_id', how='left')
display (song_df.shape)
display (song_df.head())

# Display the length of each data Frame 
print(len(song_df_1), len(song_df_2))

# Length of the consolidated data frame 
len(song_df)

'''
Select only 50000 records to create a model
This is to improve performance 
'''

song_df = song_df.head(50000)
song_df.shape

# Creating new feature combining title and artist name
song_df['song'] = song_df['title']+' - '+song_df['artist_name']
song_df.head()

'''
Cumulative sum of listen count of the songs
Group by based on Song 
'''

song_grouped = song_df.groupby(['song']).agg({'listen_count':'count'}).reset_index()
song_grouped.head()

# Sum of count – This is same as total number of records 
grouped_sum = song_grouped['listen_count'].sum()
grouped_sum

# Display the percentage to identify most popular song 
song_grouped['percentage'] = (song_grouped['listen_count'] / grouped_sum ) * 100
song_grouped.sort_values(['listen_count', 'song'], ascending=[0,1])

# Popularity Recommendation Engine

# Import Popularity Recommender Model 
pr = Recommenders.popularity_recommender_py()
pr.create(song_df, 'user_id', 'song')

# Display the top 10 popular songs- User 5 
pr.recommend(song_df['user_id'][5])

# Display the top 10 popular songs- User 100
# This is popularity rating same for all users 
pr.recommend(song_df['user_id'][100])


# Item Similarity Recommendation

# Import Item Similarity Model 
ir = Recommenders.item_similarity_recommender_py()
ir.create(song_df, 'user_id', 'song')


# Item Similarity for User -5 
user_items = ir.get_user_items(song_df['user_id'][5])

# display user songs history
for user_item in user_items:
    print(user_item)

# Item Similarity for User -100

user_items = ir.get_user_items(song_df['user_id'][100])

# display user songs history
for user_item in user_items:
    print(user_item)


# Give song recommendation for that user-5
ir.recommend(song_df['user_id'][5])


# Give song recommendation for that user-100
ir.recommend(song_df['user_id'][100])


# Recommendation based on Song Name 

# Based on selected song provide recommendation 
ir.get_similar_items(['Oliver James - Fleet Foxes', 'The End - Pearl Jam'])

# Recommendation for another song 
ir.get_similar_items(['Use Somebody - Kings Of Leon'])


(2000000, 3)
                                    user_id             song_id  listen_count
0  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOAKIMP12A8C130995             1
1  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOBBMDR12A8C13253B             2
2  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOBXHDL12A81C204C0             1
3  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOBYHAJ12A6701BF1D             1
4  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SODACBL12A8C13C273             1
(1000000, 5)


Unnamed: 0,song_id,title,release,artist_name,year
0,SOQMMHC12AB0180CB8,Silent Night,Monster Ballads X-Mas,Faster Pussy cat,2003
1,SOVFVAK12A8C1350D9,Tanssi vaan,Karkuteillä,Karkkiautomaatti,1995
2,SOGTUKN12AB017F4F1,No One Could Ever,Butter,Hudson Mohawke,2006
3,SOBNYVR12A8C13558C,Si Vos Querés,De Culo,Yerba Brava,2003
4,SOHSBXH12A8C13B0DF,Tangle Of Aspens,Rene Ablaze Presents Winter Sessions,Der Mystic,0


(2000000, 7)

Unnamed: 0,user_id,song_id,listen_count,title,release,artist_name,year
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1,The Cove,Thicker Than Water,Jack Johnson,0
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B,2,Entre Dos Aguas,Flamenco Para Niños,Paco De Lucia,1976
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBXHDL12A81C204C0,1,Stronger,Graduation,Kanye West,2007
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBYHAJ12A6701BF1D,1,Constellations,In Between Dreams,Jack Johnson,2005
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SODACBL12A8C13C273,1,Learn To Fly,There Is Nothing Left To Lose,Foo Fighters,1999


2000000 1000000
The Cove - Jack Johnson
Entre Dos Aguas - Paco De Lucia
Stronger - Kanye West
Constellations - Jack Johnson
Learn To Fly - Foo Fighters
Apuesta Por El Rock 'N' Roll - Héroes del Silencio
Paper Gangsta - Lady GaGa
Stacked Actors - Foo Fighters
Sehr kosmisch - Harmonia
Heaven's gonna burn your eyes - Thievery Corporation feat. Emiliana Torrini
Let It Be Sung - Jack Johnson / Matt Costa / Zach Gill / Dan Lebowitz / Steve Adams
I'll Be Missing You (Featuring Faith Evans & 112)(Album Version) - Puff Daddy
Love Shack - The B-52's
Clarity - John Mayer
I?'m A Steady Rollin? Man - Robert Johnson
The Old Saloon - The Lonely Island
Behind The Sea [Live In Chicago] - Panic At The Disco
Champion - Kanye West
Breakout - Foo Fighters
Ragged Wood - Fleet Foxes
Mykonos - Fleet Foxes
Country Road - Jack Johnson / Paula Fuga
Oh No - Andrew Bird
Love Song For No One - John Mayer
Jewels And Gold - Angus & Julia Stone
83 - John Mayer
Neon - John Mayer
The Middle - Jimmy Eat World
High and dr