In [None]:
import spotipy as sp
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.util as util
import pandas as pd
from pandas.io.json import json_normalize
import json
import datetime
import lyricsgenius

## Importing Spotify Data

In [None]:
with open('data/StreamingHistory0.json', 'rb') as data:
    history_0_json = json.load(data)
with open('data/StreamingHistory1.json', 'rb') as data:
    history_1_json = json.load(data)

In [None]:
history_0 = pd.DataFrame(history_0_json)
history_1 = pd.DataFrame(history_1_json)
history = history_0.append(history_1)
history = history[['artistName', 'endTime', 'trackName']]
history['source'] = [0] * history.shape[0]

In [None]:
history.sort_values('endTime', ascending=True).head()

In [None]:
spotify_streams = history.shape[0]
spotify_songs = len(history['trackName'].unique())
spotify_artists = len(history['artistName'].unique())

In [None]:
spotify_artists, spotify_songs, spotify_streams

## Merging Last.fm Data

In [None]:
lastfm = pd.read_csv('data/last_fm.csv', header=None)
lastfm.rename(columns={0:'artistName', 1:'album', 2:'trackName', 3:'endTime'}, inplace=True)
lastfm['endTime'] = pd.to_datetime(lastfm['endTime'])
lastfm = lastfm[['artistName', 'trackName', 'endTime']]
lastfm = lastfm[lastfm['endTime'] < datetime.date(2019, 2, 6)]
lastfm['source'] = [1] * lastfm.shape[0]
lastfm.head()

In [None]:
lastfm.shape

In [None]:
history = history.append(lastfm)
history.reset_index(inplace=True, drop=True)
# history.to_csv("data/history.csv", index=False)

In [None]:
history.head()

In [None]:
total_streams = history.shape[0]
total_songs = len(history['trackName'].unique())
total_artists = len(history['artistName'].unique())

In [None]:
total_artists, total_songs, total_streams

## Obtaining Lyrics & Spotify Valence

In [None]:
history = pd.read_csv("data/history.csv")
history.head()

In [None]:
key_file = 'keys/keys.json'
keys = json.load(open(key_file))

SPOTIPY_CLIENT_ID= keys["client_id"]
SPOTIPY_CLIENT_SECRET= keys["client_secret"]
GENIUS = keys["genius"]

In [None]:
spotify = sp.Spotify()
client_credentials_manager = SpotifyClientCredentials(client_id=SPOTIPY_CLIENT_ID,
                                                     client_secret=SPOTIPY_CLIENT_SECRET)
spotify = sp.Spotify(client_credentials_manager=client_credentials_manager)
genius = lyricsgenius.Genius(GENIUS)

In [None]:
valences = []
durations = []
checked_valences = {}
checked_durations = {}
failed_spotify = []

for index, row in history.iterrows():
    if index % 10 == 0:
        print(index)
    
    try:
        if (row['artistName'] + row['trackName']) in checked_valences.keys():
            durations.append(checked_durations[row['artistName'] + row['trackName']])
            valences.append(checked_valences[row['artistName'] + row['trackName']])
        else:
            song_id = spotify.search('artist: ' + row['artistName'] + ', track: ' + row['trackName'])['tracks']['items'][0]['id']
            features = spotify.audio_features(song_id)[0]
            valence = features['valence']
            checked_valences[row['artistName'] + row['trackName']] = valence
            duration = features['duration_ms']
            checked_durations[row['artistName'] + row['trackName']] = duration

            durations.append(duration)
            valences.append(valence)
    except:
        failed_spotify.append(index)
        checked_valences[row['artistName'] + row['trackName']] = None
        checked_durations[row['artistName'] + row['trackName']] = None
        valences.append(None)
        durations.append(None)

In [None]:
history['valence'] = valences
history['duration_ms'] = durations
# history.to_csv("data/history.csv", index=False)

In [None]:
lyrics = []
failed_lyrics = []
checked_lyrics = {}

for index, row in history.iterrows():
    if index % 10 == 0:
        print(index)

    if (row['artistName'] + row['trackName']) in checked_lyrics.keys():
        lyrics.append(checked_lyrics[row['artistName'] + row['trackName']])
    else:
        try:
            song = genius.search_song(row['trackName'],  artist_name=row['artistName'], take_first_result=True)
            if song is None:
                lyrics.append(None)
                checked_lyrics[row['artistName'] + row['trackName']] = None
            else:
                lyrics.append(song.lyrics)
                checked_lyrics[row['artistName'] + row['trackName']] = song.lyrics
        except:
            lyrics.append(None)
            checked_lyrics[row['artistName'] + row['trackName']] = None
            failed_lyrics.append(index)

hing for "Dangerous World (feat. Travis Scott & YG)"...
Searching for "Alive (with Offset & 2 Chainz)"...
Searching for "My Nigga"...
Searching for "Never Call Me (feat. YG) - Remix"...
Searching for "SLAY (feat. Quavo)"...
Searching for "She Bad"...
Searching for "Same Bitches (feat. G-Eazy & YG)"...
Searching for "Turn Down for What"...
Searching for "Demasiado Loca"...
1780
Searching for "Don't Tell 'Em"...
Searching for "That's My N**** (with Meek Mill, YG & Snoop Dogg)"...
Searching for "Culo"...
Searching for "HANDGUN (feat. A$AP Rocky)"...
Searching for "Toot It And Boot It"...
1790
Searching for "Why You Always Hatin?"...
Searching for "Plug Walk (feat. Gucci Mane, YG, 2Chainz) - Remix"...
Searching for "I Wanna Benz"...
Searching for "Who Do You Love?"...
Searching for "BULLETPROOF (feat. Jay 305)"...
Searching for "F.I.G.H.T. (with Eearz, Gucci Mane, YG, Trouble, Quavo & Juicy J)"...
Searching for "Ride Out"...
Searching for "One Time Comin'"...
Searching for "I Just Wanna Pa

In [None]:
history['lyrics'] = lyrics
history.to_csv("data/history.csv", index=False)

In [None]:
genius.search_song("bos")