In [1]:
######################################
###          DATA STORY            ###
######################################

In [2]:
### Imports ###
import pandas as pd
import matplotlib.pyplot as plt 
%matplotlib inline
import scipy.stats as sstats

# EchoNest API
from pyechonest import config
from pyechonest import song
from pyechonest import artist

# LastFM API
import pylast

# Functions used in this notebook
import dataStoryFunctions as dsf

In [3]:
# Loads the credentials from the yaml file
secrets = dsf.load_secrets()

# Set ECHO_NEST_API_KEY value
config.ECHO_NEST_API_KEY = secrets["echonest_api_key"]

# Set LastFM API_KEY and API_SECRET
# Obtain yours from http://www.last.fm/api/account for Last.fm
API_KEY = secrets["lastfm_api_key"]
API_SECRET = secrets["lastfm_api_secret"]

# In order to perform a write operation you need to authenticate yourself
username = secrets["lastfm_username"]
password_hash = pylast.md5(secrets["lastfm_password_hash"])

last_fm_network = pylast.LastFMNetwork(api_key = API_KEY, api_secret =
    API_SECRET, username = username, password_hash = password_hash)

In [4]:
# Define the starting and ending years 
start_year = 1960
end_year = 2015

In [5]:
# The next steps take a lot of time to complete and have been done in the
# data processing and cleaning process.
# The final dataframe has been completed manually for the remaining missing data.

In [6]:
# Creation of the global dataframe

# billboard_df = dsf.create_billboard_df_from_CSV(start_year, years)
# s = billboard_df['Title'].str.split('" / "').apply(pd.Series, 1).stack()
# s.index = s.index.droplevel(-1)
# s.name = 'Title'
# del billboard_df['Title']
# billboard_df = billboard_df.join(s)
# billboard_df = billboard_df[['Num', 'Artist(s)', 'Title', 'Year']] 

In [7]:
# Addition of new characteristics to the dataframe (artist location, audio summary...)

# billboard_df_additional_data_dict = dsf.add_songs_characteristics_to_df(billboard_df, 'CSV_data/billboard_df-final.csv')

In [8]:
# The final dataframe has been built using the previous commands and completed manually
# in Excel. Everything has been saved in a CSV file which will be loaded in a
# pandas dataframe.

billboard_df_final = pd.read_csv('CSV_data/billboard_df-final.csv', sep=';')
del billboard_df_final['Colonne1']

In [9]:
billboard_df_final.tail()

Unnamed: 0,Num,Artist(s),Title,Year,Lead Artist(s),latitude,longitude,location,song_type_0,song_type_1,...,duration,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,valence
5611,96,Nicky Jam and Enrique Iglesias,El Perdon,2015,Nicky Jam,18.2233,-66.4289,Puerto Rico,studio,electric,...,348.04381,0.908296,0.867,0,0.066372,-7.132,1,0.052743,124.998,0.114123
5612,97,Ne-Yo featuring Juicy J,She Knows,2015,Ne-Yo,36.301068,-115.206969,"Las Vegas, Nevada",studio,electric,...,156.44689,0.260174,0.0,4,0.3608,-18.315,1,0.035362,142.846,0.544282
5613,98,One Direction,Night Changes,2015,One Direction,51.5063,-0.12714,"London, England, GB",studio,vocal,...,226.6,0.520009,0.0,8,0.115136,-7.747,1,0.035291,120.001,0.395633
5614,99,Drake,Back to Back,2015,Drake,43.666667,-79.416667,"Toronto, Canada",studio,electric,...,178.18077,0.262854,0.0,8,0.097088,-17.465,0,0.43167,86.733,0.306762
5615,100,Calvin Harris and Disciples,How Deep Is Your Love,2015,Calvin Harris,55.0682,-3.6093,"Dumfries, Scotland, GB",studio,electric,...,220.52,0.86311,0.00527,4,0.06668,-4.693,0,0.062423,122.114,0.290828


In [42]:
# Number of songs by artist in the Top 100

# Choices made:
#  * If a song is a featuring, I have given the same significance to each artist contribution:
#        - Ne-Yo featuring Juicy J - She Knows 
#          will give one song for Ne-Yo and one for Juicy J
#  * If the artist name includes an '&', I assume that it is a band or an indivisible duo / trio...
#        - Kool & the Gang - Celebration 
#          will give one song for Kool & the Gang
#  * If the artist name includes an 'and', I have handled two separate cases:
#        - If the song has been released before 1982, I have considered the artist as a band,
#          as it seems very frequent to have band names like "Derek and the Dominos" in those years.
#          In that case:
#          Derek and the Dominos - Layla
#          will give one song for Derek and the Dominos
#        - If the song has been released in 1982 and after, I have  given the same significance 
#          to each artist contribution:
#          R. Kelly and Celine Dion - I'm Your Angel
#          will give one song for R. Kelly and one for Celine Dion
#        - Some exceptions have been handled manually:
#          Evan and Jaron - Crazy for This Girl (released in 2001)
#          will give one song for Evan and Jaron

# This methodology is not perfect, but it seems reasonable and accurate enough for the study.
# The choice of 1982 has been done by looking into the data, and investigate manually on the 
# band names.

reload(dsf)

<module 'dataStoryFunctions' from 'dataStoryFunctions.py'>

In [43]:
unique_artist_df = dsf.create_entries_by_unique_artist(billboard_df_final, start_year, end_year)
unique_artist_df.tail()

Unnamed: 0,Rank,Artist(s),Title,Year
6416,97,Juicy J,She Knows,2015
6417,98,One Direction,Night Changes,2015
6418,99,Drake,Back to Back,2015
6419,100,Calvin Harris,How Deep Is Your Love,2015
6420,100,Disciples,How Deep Is Your Love,2015


In [44]:
unique_artist_df_count = dsf.create_entries_count_by_artist(unique_artist_df)

In [45]:
unique_artist_df_count.head()

Unnamed: 0,Artist(s),Counts,Rank,List of songs,Years of presence,1960,1961,1962,1963,1964,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
1759,Rihanna,37,41.135135,Title: Pon de Replay -Year: 2005 -Rank: 18/---...,11,0,0,0,0,0,...,2,3,5,3,4,6,6,3,2,2
1354,Mariah Carey,36,32.666667,Title: Vision of Love -Year: 1990 -Rank: 6/---...,18,0,0,0,0,0,...,1,0,1,1,0,0,0,1,0,0
1342,Madonna,36,48.555556,Title: Borderline -Year: 1984 -Rank: 35/---/Ti...,18,0,0,0,0,0,...,1,0,1,0,0,0,0,0,0,0
1259,Lil Wayne,33,52.484848,Title: Back That Azz Up -Year: 1999 -Rank: 75/...,12,0,0,0,0,0,...,1,2,8,4,4,6,1,1,2,2
2375,Usher,30,38.333333,Title: You Make Me Wanna... -Year: 1997 -Rank:...,13,0,0,0,0,0,...,0,1,1,0,4,3,3,0,1,1


In [46]:
unique_artist_df_count_with_images = dsf.add_image_url_to_artist_count_df(unique_artist_df_count, last_fm_network)

Marilyn McCoo & Billy Davis
K. P. & Envyi
Garnet Mimms & and the Enchanters
"Mama" Cass Elliot
B-Rock and the Bizz
The Blackout All-Stars
Disco-Tex and the Sex-O-Lettes


In [47]:
unique_artist_df_count_with_images

Unnamed: 0,Artist(s),Counts,Rank,List of songs,Years of presence,1960,1961,1962,1963,1964,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,Image URL
1759,Rihanna,37,41.135135,Title: Pon de Replay -Year: 2005 -Rank: 18/---...,11,0,0,0,0,0,...,3,5,3,4,6,6,3,2,2,http://img2-ak.lst.fm/i/u/361e7fee96af4f2fb72c...
1354,Mariah Carey,36,32.666667,Title: Vision of Love -Year: 1990 -Rank: 6/---...,18,0,0,0,0,0,...,0,1,1,0,0,0,1,0,0,http://img2-ak.lst.fm/i/u/61ca3abeab76743c4d6a...
1342,Madonna,36,48.555556,Title: Borderline -Year: 1984 -Rank: 35/---/Ti...,18,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,http://img2-ak.lst.fm/i/u/b39ba21b6e184bfaaca0...
1259,Lil Wayne,33,52.484848,Title: Back That Azz Up -Year: 1999 -Rank: 75/...,12,0,0,0,0,0,...,2,8,4,4,6,1,1,2,2,http://img2-ak.lst.fm/i/u/d5df4736e4ea44588cc1...
2375,Usher,30,38.333333,Title: You Make Me Wanna... -Year: 1997 -Rank:...,13,0,0,0,0,0,...,1,1,0,4,3,3,0,1,1,http://img2-ak.lst.fm/i/u/1a08595ffd1f4fc9cfbc...
677,Elton John,30,44.800000,Title: Rocket Man -Year: 1972 -Rank: 40/---/Ti...,18,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,http://img2-ak.lst.fm/i/u/6e9bdfb2663f4e78ae83...
1317,Ludacris,29,51.724138,Title: One Minute Man -Year: 2001 -Rank: 57/--...,10,0,0,0,0,0,...,3,0,1,5,1,0,0,0,0,http://img2-ak.lst.fm/i/u/38239a4576db4eab8914...
409,Chris Brown,28,53.142857,Title: Run It! -Year: 2005 -Rank: 42/---/Title...,9,0,0,0,0,0,...,2,5,0,1,4,4,0,3,4,http://img2-ak.lst.fm/i/u/b78f8741f33c4f28cf00...
632,Drake,28,58.750000,Title: Best I Ever Had -Year: 2009 -Rank: 22/-...,7,0,0,0,0,0,...,0,0,2,5,5,3,4,3,6,http://img2-ak.lst.fm/i/u/d5e72a1e72334770c784...
1703,R. Kelly,28,64.464286,Title: Bump n' Grind -Year: 1994 -Rank: 11/---...,12,0,0,0,0,0,...,4,0,0,0,0,0,0,1,0,http://img2-ak.lst.fm/i/u/b68b2f0fc9a34126a0d3...
