In [1]:
from google.colab import drive
drive.mount("/content/ml/")

Drive already mounted at /content/ml/; to attempt to forcibly remount, call drive.mount("/content/ml/", force_remount=True).


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.sparse import csr_matrix

In [3]:
user_song_listened = pd.read_csv('/content/ml/MyDrive/datasets/10000.txt',sep='\t',
                                  header=None)
user_song_listened.columns=['user_id', 'song_id', 'listen_count']

#song information metadata
song_information_data = pd.read_csv('/content/ml/MyDrive/datasets/song_data.csv')

In [4]:
song_information_data.shape[0]

1000000

In [5]:
song_information_data.drop_duplicates(['song_id'], inplace=True)
song_information_data.shape[0]

999056

In [6]:
song_information_data.head()

Unnamed: 0,song_id,title,release,artist_name,year
0,SOQMMHC12AB0180CB8,Silent Night,Monster Ballads X-Mas,Faster Pussy cat,2003
1,SOVFVAK12A8C1350D9,Tanssi vaan,Karkuteillä,Karkkiautomaatti,1995
2,SOGTUKN12AB017F4F1,No One Could Ever,Butter,Hudson Mohawke,2006
3,SOBNYVR12A8C13558C,Si Vos Querés,De Culo,Yerba Brava,2003
4,SOHSBXH12A8C13B0DF,Tangle Of Aspens,Rene Ablaze Presents Winter Sessions,Der Mystic,0


In [7]:
user_song_listened.shape[0]

2000000

Unique Users

In [8]:
unique_users = user_song_listened['user_id'].unique()
unique_users.shape[0]

76353

Unique *Songs*

In [9]:
unique_songs = user_song_listened['song_id'].unique()
unique_songs.shape[0]

10000

Unique **Artists**

In [10]:
unique_artists = song_information_data['artist_name'].unique()
unique_artists.shape[0]

72652

Merge Tables

In [11]:
songs = pd.merge(user_song_listened, song_information_data, on="song_id", how="left")

In [12]:
songs.describe()

Unnamed: 0,listen_count,year
count,2000000.0,2000000.0
mean,3.045485,1628.645
std,6.57972,778.7283
min,1.0,0.0
25%,1.0,1984.0
50%,1.0,2002.0
75%,3.0,2007.0
max,2213.0,2010.0


In [13]:
songs.head()

Unnamed: 0,user_id,song_id,listen_count,title,release,artist_name,year
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1,The Cove,Thicker Than Water,Jack Johnson,0
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B,2,Entre Dos Aguas,Flamenco Para Niños,Paco De Lucia,1976
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBXHDL12A81C204C0,1,Stronger,Graduation,Kanye West,2007
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBYHAJ12A6701BF1D,1,Constellations,In Between Dreams,Jack Johnson,2005
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SODACBL12A8C13C273,1,Learn To Fly,There Is Nothing Left To Lose,Foo Fighters,1999


Popular **Songs**

In [14]:
popular_songs = songs.groupby('title')['listen_count'].count()

In [15]:
popular_songs.reset_index().sort_values("listen_count", ascending=False)

Unnamed: 0,title,listen_count
6836,Sehr kosmisch,8277
8725,Undo,7032
1964,Dog Days Are Over (Radio Edit),6949
9496,You're The One,6729
6498,Revelry,6145
...,...,...
3363,Historia Del Portero,51
6782,Scared,51
2041,Don´t Leave Me Now,50
5567,No Creo En El Jamas,48


Popular **Artists**

In [16]:
popular_artists = songs.groupby('artist_name')['listen_count'].count()

In [17]:
popular_artists.reset_index().sort_values("listen_count", ascending=False)

Unnamed: 0,artist_name,listen_count
649,Coldplay,29422
2850,The Black Keys,19862
1651,Kings Of Leon,18747
1107,Florence + The Machine,18112
1370,Jack Johnson,17801
...,...,...
2607,Shotta,54
2427,Ricardo Montaner,52
3208,Umphrey's McGee,52
2915,The Four Seasons,52


In [18]:
user_song_count = songs.groupby("user_id")["song_id"].count().reset_index()
more_than_10_user_id = user_song_count[user_song_count["song_id"] > 10]["user_id"].to_list()
more_than_10_user_id[:10]

['000a5c8b4d8b2c98f7a205219181d039edcd4506',
 '000b474f815bcff17a4bc9ce5324f9352dafe07d',
 '000e2c2a8c7870ff9121f212b35c8b3a20cc0e67',
 '000ebc858861aca26bac9b49f650ed424cf882fc',
 '000ef25cc955ad5841c915d269432eea41f4a1a5',
 '00125672fad06bc57f5a4304cceea9effab07839',
 '0012bf75d43a724f62dc746d9e85ae0088a3a1d6',
 '001322829b5dc3edc59bf78189617ddd8f23c82a',
 '00185e316f07f0f00c325ca034be59c15b362401',
 '0019740e3e8c24e223a6f88e3faa7c144ec5a014']

In [19]:
songs_more_ten_user_listened = songs[songs['user_id'].isin(more_than_10_user_id)].reset_index(drop=True)
songs_more_ten_user_listened

Unnamed: 0,user_id,song_id,listen_count,title,release,artist_name,year
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1,The Cove,Thicker Than Water,Jack Johnson,0
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B,2,Entre Dos Aguas,Flamenco Para Niños,Paco De Lucia,1976
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBXHDL12A81C204C0,1,Stronger,Graduation,Kanye West,2007
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBYHAJ12A6701BF1D,1,Constellations,In Between Dreams,Jack Johnson,2005
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SODACBL12A8C13C273,1,Learn To Fly,There Is Nothing Left To Lose,Foo Fighters,1999
...,...,...,...,...,...,...,...
1839681,d8bfd4ec88f0f3773a9e022e3c1a0f1d3b7b6a92,SOJEYPO12AAA8C6B0E,2,Ignorance (Album Version),Ignorance,Paramore,0
1839682,d8bfd4ec88f0f3773a9e022e3c1a0f1d3b7b6a92,SOJJYDE12AF729FC16,4,Two Is Better Than One,Love Drunk,Boys Like Girls featuring Taylor Swift,2009
1839683,d8bfd4ec88f0f3773a9e022e3c1a0f1d3b7b6a92,SOJKQSF12A6D4F5EE9,3,What I've Done (Album Version),What I've Done,Linkin Park,2007
1839684,d8bfd4ec88f0f3773a9e022e3c1a0f1d3b7b6a92,SOJUXGA12AC961885C,1,Up,My Worlds,Justin Bieber,2010


In [22]:
matrix_songs_users_listen_count = songs_more_ten_user_listened.pivot(index='song_id', columns='user_id', values='listen_count').fillna(0)

In [23]:
sparse_songs_users_listen_count = csr_matrix(matrix_songs_users_listen_count.values)

In [25]:
print(sparse_songs_users_listen_count)

  (0, 282)	2.0
  (0, 1432)	1.0
  (0, 2113)	1.0
  (0, 2832)	1.0
  (0, 5123)	1.0
  (0, 5310)	1.0
  (0, 5952)	5.0
  (0, 9797)	3.0
  (0, 10093)	35.0
  (0, 10721)	1.0
  (0, 11466)	1.0
  (0, 11559)	3.0
  (0, 12296)	12.0
  (0, 13028)	5.0
  (0, 13781)	1.0
  (0, 14068)	14.0
  (0, 14262)	1.0
  (0, 14458)	20.0
  (0, 14603)	1.0
  (0, 14768)	1.0
  (0, 15676)	3.0
  (0, 16924)	1.0
  (0, 16991)	1.0
  (0, 17074)	1.0
  (0, 17791)	1.0
  :	:
  (9999, 32211)	1.0
  (9999, 33741)	1.0
  (9999, 35546)	4.0
  (9999, 36803)	1.0
  (9999, 37573)	2.0
  (9999, 37795)	1.0
  (9999, 38344)	1.0
  (9999, 38401)	1.0
  (9999, 39421)	4.0
  (9999, 40335)	1.0
  (9999, 41077)	1.0
  (9999, 42520)	1.0
  (9999, 43076)	2.0
  (9999, 43503)	1.0
  (9999, 44848)	5.0
  (9999, 46773)	3.0
  (9999, 46824)	6.0
  (9999, 47201)	6.0
  (9999, 48003)	1.0
  (9999, 48313)	1.0
  (9999, 50908)	2.0
  (9999, 51323)	1.0
  (9999, 51470)	3.0
  (9999, 51640)	5.0
  (9999, 51769)	1.0
