# ADA 2018 -  “Happiness” Share it through music.

#### Andres Montero, Ariel Alba, Diego Iriarte




In [1]:
% matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import reverse_geocoder as rg
import os.path


In [2]:
# Constants
DATA_DIR = './data/'
MUSIC_DIR = '{data}{file}/'.format(data=DATA_DIR, 
                                  file='fma_metadata')
PKL_DIR = '{data}{file}/'.format(data=DATA_DIR, 
                               file='pkl')
# True if we are going to read data from pickle files
READ_FROM_PKL = False
# True if we want to to execute the clean phase, with loading data from original source
CLEAN_PHASE = True

In [3]:
if CLEAN_PHASE:
    echonest_path = '{data}{file}'.format(data=MUSIC_DIR, 
                                          file='echonest.csv')
    features_path = '{data}{file}'.format(data=MUSIC_DIR,
                                          file='features.csv')
    genres_path = '{data}{file}'.format(data=MUSIC_DIR,
                                        file='genres.csv')
    tracks_path = '{data}{file}'.format(data=MUSIC_DIR,
                                        file='tracks.csv')
    

In [4]:
if CLEAN_PHASE:
    # Load datasets into pandas dataframes
    echonest_col_names=['track_id','danceability','energy',
                        'artist_latitude','artist_longitude',
                        'artist_name','artist_discovery',
                        'artist_family','artist_hotness',
                        'song_currency','song_hotness']
    echonest_dtypes = {'track_id': int, 'danceability': float, 
                       'energy': float, 'artist_latitude': float, 
                       'artist_longitude': float, 'artist_name': str,
                       'artist_discovery': float, 'artist_family': float,
                       'artist_hotness': float, 'song_currency': float,
                       'song_hotness': float}
    echonest_df = pd.read_csv(echonest_path, names=echonest_col_names,
                              header=3, dtype=echonest_dtypes, 
                              usecols=[0,2,3,11,13,14,21,22,23,24,25])

    genres_dtypes = {'genre_id': int, '#tracks': int, 
                     'parent': int, 'top_level': int}
    genres_df = pd.read_csv(genres_path, dtype=genres_dtypes)
    
    track_col_names = ['track_id', 'album_date_created',
                       'album_date_released', 'album_id',
                       'album_listens', 'album_title', 'artist_id',
                       'artist_name', 'track_duration', 'track_genre_top',
                       'track_language', 'track_listens', 'track_tags', 
                       'track_title']

    tracks_dtypes = {'track_id': int, 'album_date_created': str,
                     'album_date_released': str, 
                     'album_id': int, 'album_listens': int, 
                     'album_title': str, 'artist_id': int,
                     'artist_name': str, 'track_duration': int, 
                     'track_genre_top': str, 'track_language': str,
                     'track_listens': int, 'track_tags': str, 
                     'track_title': str}

    tracks_df = pd.read_csv(tracks_path, names=track_col_names,
                            header=2,
                            usecols=[0, 2, 3, 6, 8, 11, 21, 26, 38,
                                     40, 45, 47, 51, 52])


In [5]:
# Echonest clean phase 
if CLEAN_PHASE:
    # Get city, state, country from artist longitude and latitude
    cities=[]
    states=[]
    countries=[]
    
    for i in range (0, len(echonest_df)):
        if np.isnan(echonest_df.artist_latitude[i]):
            city = np.nan
            state = np.nan
            country = np.nan
        else:
            coordinates = (echonest_df.artist_latitude[i],echonest_df.artist_longitude[i])
            results = rg.search(coordinates,mode=1)
            city = results[0]['name']
            state = results[0]['admin1']
            country = results[0]['cc']
        
        cities.append(city)
        states.append(state)
        countries.append(country)
    
    echonest_df.insert(loc=5, column='city', 
                       value=pd.Series(cities))
    echonest_df.insert(loc=6, column='state',
                       value=pd.Series(states))
    echonest_df.insert(loc=7, column='country', 
                       value=pd.Series(countries))
    
    

Loading formatted geocoded file...


In [None]:
# Tracks clean phase
if CLEAN_PHASE:
    # Transforms str to datetime
    tracks_df['album_date_released'] = pd.to_datetime( \
                                            tracks_df['album_date_released'])
    tracks_df['album_date_created'] = pd.to_datetime( \
                                            tracks_df['album_date_created'])


In [6]:
if not READ_FROM_PKL:
    echonest_df.to_pickle(PKL_DIR + 'echonest_df.pkl')
    genres_df.to_pickle(PKL_DIR + 'genres_df.pkl')
else:
    echonest_df = pd.read_pickle(PKL_DIR + 'echonest_df.pkl')
    genres_df = pd.read_pickle(PKL_DIR + 'genres_df.pkl')
    

In [7]:
echonest_df.head()

Unnamed: 0,track_id,danceability,energy,artist_latitude,artist_longitude,city,state,country,artist_name,artist_discovery,artist_family,artist_hotness,song_currency,song_hotness
0,2,0.675894,0.634476,32.6783,-83.223,Jeffersonville,Georgia,US,AWOL,0.38899,0.38674,0.40637,0.0,0.0
1,3,0.528643,0.817461,32.6783,-83.223,Jeffersonville,Georgia,US,AWOL,0.38899,0.38674,0.40637,0.0,0.0
2,5,0.745566,0.70147,32.6783,-83.223,Jeffersonville,Georgia,US,AWOL,0.38899,0.38674,0.40637,0.0,0.0
3,10,0.658179,0.924525,39.9523,-75.1624,Philadelphia,Pennsylvania,US,Kurt Vile,0.557339,0.614272,0.798387,0.005158,0.354516
4,134,0.513238,0.56041,32.6783,-83.223,Jeffersonville,Georgia,US,AWOL,0.38899,0.38674,0.40637,0.0,0.0


In [8]:
genres_df.head()

Unnamed: 0,genre_id,#tracks,parent,title,top_level
0,1,8693,38,Avant-Garde,38
1,2,5271,0,International,2
2,3,1752,0,Blues,3
3,4,4126,0,Jazz,4
4,5,4106,0,Classical,5


In [9]:
tracks_df.head()

Unnamed: 0,track_id,album_date_created,album_date_released,album_id,album_listens,album_title,artist_id,artist_name,track_duration,track_genre_top,track_language,track_listens,track_tags,track_title
0,2,2008-11-26 01:44:45,2009-01-05 00:00:00,1,6073,AWOL - A Way Of Life,1,AWOL,168,Hip-Hop,en,1293,[],Food
1,3,2008-11-26 01:44:45,2009-01-05 00:00:00,1,6073,AWOL - A Way Of Life,1,AWOL,237,Hip-Hop,en,514,[],Electric Ave
2,5,2008-11-26 01:44:45,2009-01-05 00:00:00,1,6073,AWOL - A Way Of Life,1,AWOL,206,Hip-Hop,en,1151,[],This World
3,10,2008-11-26 01:45:08,2008-02-06 00:00:00,6,47632,Constant Hitmaker,6,Kurt Vile,161,Pop,en,50135,[],Freeway
4,20,2008-11-26 01:45:05,2009-01-06 00:00:00,4,2710,Niris,4,Nicky Cook,311,,en,361,[],Spiritual Level
