In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns

import json

from fycharts.SpotifyCharts import SpotifyCharts
import sqlalchemy

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

## Scraping Spotify Chats for Viral 50 from Italy, Spain, and Greece 

In [3]:
with open("../spotify_credentials.json", "r") as json_file:
    creds = json.load(json_file)

my_client_id = creds['SPOTIPY_CLIENT_ID']
my_client_secret = creds['SPOTIPY_CLIENT_SECRET']

client_credentials_manager = SpotifyClientCredentials(client_id=my_client_id, client_secret=my_client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

## Italy Top Charting Songs, Viral 50, 2017-2020

+ _All lists of top 200 / viral 50 song gathered from [Spotify Charts Regional](https://spotifycharts.com/regional/)_

+ _Data are written as both a csv file and a SQLLite db._ 

+ _**Citation:** Code for how to scrape [Spotify Charts Regional](https://spotifycharts.com/regional/) is inspired by the excellent documentation for the [Unofficial Spotify Charts API](https://github.com/kelvingakuo/fycharts) called `fycharts`._

In [4]:
pwd

'/Users/emilynaftalin/Data_Science/General Assembly/dsi/capstone/code'

#### _2017_

In [5]:
api = SpotifyCharts()
connector = sqlalchemy.create_engine("sqlite:///../dataa/italy_2017_v50.db", echo=False)
api.viral50Daily(output_file = "../data/italy_2017_v50.csv", output_db = connector, webhook = ["https://mywebhookssite.com/post/"], start = "2017-01-01", end = "2017-12-31", region = "it")

INFO : 14/02/2021 11:29:49 PM : Extracting viral 50 daily for 2017-01-01 - it
INFO : 14/02/2021 11:29:50 PM : Extracting viral 50 daily for 2017-01-02 - it
INFO : 14/02/2021 11:29:50 PM : POSTing data to the endpoint https://mywebhookssite.com/post/
INFO : 14/02/2021 11:29:50 PM : Appending data to the table viral_50_daily
INFO : 14/02/2021 11:29:50 PM : Appending data to the file ../data/italy_2017_v50.csv...
INFO : 14/02/2021 11:29:50 PM : Done appending to the file ../data/italy_2017_v50.csv!!!
Exception in thread Thread-6:
Traceback (most recent call last):
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/site-packages/urllib3/connection.py", line 159, in _new_conn
    conn = connection.create_connection(
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/site-packages/urllib3/util/connection.py", line 61, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/socket.py",

In [6]:
italy_2017_v50 = pd.read_csv('../data/italy_2017_v50.csv')
italy_2017_v50.head()

Unnamed: 0,Position,Track Name,Artist,date,region,spotify_id
0,1.0,Careless Whisper,George Michael,2017-01-01,it,4jDmJ51x1o9NZB5Nxxc7gY
1,2.0,Tranquilla!,Fabrizio Vidale,2017-01-01,it,7D64ejn2d3g6QaahkYdKXH
2,3.0,Jesus to a Child,George Michael,2017-01-01,it,2SzCxX6M6vDwdEwnHDiTaY
3,4.0,Faith - Remastered,George Michael,2017-01-01,it,0HEmnAUT8PHznIAAmVXqFJ
4,5.0,Freedom! '90,George Michael,2017-01-01,it,1D6nV9TPfMnWm7UdVsDVfI


In [56]:
italy_2017_v50.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18250 entries, 0 to 18249
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Position    18150 non-null  float64
 1   Track Name  18148 non-null  object 
 2   Artist      18148 non-null  object 
 3   date        18250 non-null  object 
 4   region      18250 non-null  object 
 5   spotify_id  18150 non-null  object 
dtypes: float64(1), object(5)
memory usage: 855.6+ KB


In [57]:
italy_2017_v50.tail()

Unnamed: 0,Position,Track Name,Artist,date,region,spotify_id
18245,46.0,Samba Megamix (12 version),Two Man Sound,2017-12-31,it,3h9GLvqH55IAqUD0kvGE6X
18246,47.0,Ricordami (Lullaby),Emiliano Coltorti,2017-12-31,it,7wTncdYglIWFBf8nRrUe8S
18247,48.0,Tropicale,Francesca Michielin,2017-12-31,it,0yuULkmsxCKtLh6mFTQhAx
18248,49.0,Motorcycle Patches,Huncho Jack,2017-12-31,it,7g7raxdQpiLZT7aOlib4S1
18249,50.0,La legge di Murphy,CIMINI,2017-12-31,it,4gfljbJ39m3lAzZCFv10N7


#### _2018_

In [8]:
api = SpotifyCharts()
connector = sqlalchemy.create_engine("sqlite:///../data/italy_2018_v50.db", echo=False)
api.viral50Daily(output_file = "../data/italy_2018_v50.csv", output_db = connector, webhook = ["https://mywebhookssite.com/post/"], start = "2018-01-01", end = "2018-12-31", region = "it")

INFO : 14/02/2021 11:35:19 PM : Extracting viral 50 daily for 2018-01-01 - it
INFO : 14/02/2021 11:35:20 PM : Extracting viral 50 daily for 2018-01-02 - it
INFO : 14/02/2021 11:35:20 PM : POSTing data to the endpoint https://mywebhookssite.com/post/
INFO : 14/02/2021 11:35:20 PM : Appending data to the table viral_50_daily
INFO : 14/02/2021 11:35:20 PM : Appending data to the file ../data/italy_2018_v50.csv...
INFO : 14/02/2021 11:35:20 PM : Done appending to the file ../data/italy_2018_v50.csv!!!
Exception in thread Thread-9:
Traceback (most recent call last):
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/site-packages/urllib3/connection.py", line 159, in _new_conn
    conn = connection.create_connection(
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/site-packages/urllib3/util/connection.py", line 61, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/socket.py",

In [13]:
italy_2018_v50 = pd.read_csv('../data/italy_2018_v50.csv')
italy_2018_v50.head()

Unnamed: 0,Position,Track Name,Artist,date,region,spotify_id
0,1,Bill Murray,I Giocattoli,2018-01-01,it,3Ws800fUdnVwySSDuNconc
1,2,The Greatest Show,Hugh Jackman,2018-01-01,it,43ay9lQZ5rfNcOOHhRF2cM
2,3,This Is Me,Keala Settle,2018-01-01,it,2xGjteMU3E1tkEPVFBO08U
3,4,A Million Dreams,Ziv Zaifman,2018-01-01,it,0RoA7ObU6phWpqhlC9zH4Z
4,5,Never Enough,Loren Allred,2018-01-01,it,0Gl5s8IhMmQE5YQwM8Qx1J


In [14]:
italy_2018_v50.shape

(18250, 6)

#### _2019_

In [11]:
api = SpotifyCharts()
connector = sqlalchemy.create_engine("sqlite:///../data/italy_2019_v50.db", echo=False)
api.viral50Daily(output_file = "../data/italy_2019_v50.csv", output_db = connector, webhook = ["https://mywebhookssite.com/post/"], start = "2019-01-01", end = "2019-12-31", region = "it")

INFO : 14/02/2021 11:41:07 PM : Extracting viral 50 daily for 2019-01-01 - it
INFO : 14/02/2021 11:41:08 PM : Extracting viral 50 daily for 2019-01-02 - it
INFO : 14/02/2021 11:41:08 PM : POSTing data to the endpoint https://mywebhookssite.com/post/
INFO : 14/02/2021 11:41:08 PM : Appending data to the table viral_50_daily
INFO : 14/02/2021 11:41:08 PM : Appending data to the file ../data/italy_2019_v50.csv...
Exception in thread Thread-12:
Traceback (most recent call last):
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/site-packages/urllib3/connection.py", line 159, in _new_conn
INFO : 14/02/2021 11:41:08 PM : Done appending to the file ../data/italy_2019_v50.csv!!!
    conn = connection.create_connection(
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/site-packages/urllib3/util/connection.py", line 61, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/socket.py"

In [15]:
italy_2019_v50= pd.read_csv('../data/italy_2019_v50.csv')
italy_2019_v50.tail()

Unnamed: 0,Position,Track Name,Artist,date,region,spotify_id
18245,46.0,Memories,Maroon 5,2019-12-31,it,2b8fOow8UzyDFAE27YhOZM
18246,47.0,CHEYENNE - feat. Charlie Charles,Francesca Michielin,2019-12-31,it,3tSiJqSOzHrp828XhPXkst
18247,48.0,Adore You,Harry Styles,2019-12-31,it,3jjujdWJ72nww5eGnfs2E7
18248,49.0,C'est La Vie,TY1,2019-12-31,it,5l93giGtSBNAJUVj8hPygt
18249,50.0,CRUDELIA - I nervi,Marracash,2019-12-31,it,1OeIJITrlUR4qss2kywMEn


#### _2020 - Feb 13, 2021_

In [16]:
api = SpotifyCharts()
connector = sqlalchemy.create_engine("sqlite:///../data/italy_2020_v50.db", echo=False)
api.viral50Daily(output_file = "../data/italy_2020_v50.csv", output_db = connector, webhook = ["https://mywebhookssite.com/post/"], start = "2020-01-01", end = "2021-02-13", region = "it")

INFO : 14/02/2021 11:54:58 PM : Extracting viral 50 daily for 2020-01-01 - it
INFO : 14/02/2021 11:54:59 PM : Extracting viral 50 daily for 2020-01-02 - it
INFO : 14/02/2021 11:54:59 PM : Appending data to the table viral_50_daily
INFO : 14/02/2021 11:54:59 PM : POSTing data to the endpoint https://mywebhookssite.com/post/
INFO : 14/02/2021 11:54:59 PM : Appending data to the file ../data/italy_2020_v50.csv...
INFO : 14/02/2021 11:54:59 PM : Done appending to the file ../data/italy_2020_v50.csv!!!
Exception in thread INFO : 14/02/2021 11:54:59 PM : Done appending to the table viral_50_daily!!!
Thread-15:
Traceback (most recent call last):
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/site-packages/urllib3/connection.py", line 159, in _new_conn
    conn = connection.create_connection(
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/site-packages/urllib3/util/connection.py", line 61, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOC

In [18]:
italy_2020_v50= pd.read_csv('../data/italy_2020_v50.csv')
italy_2020_v50.head()

Unnamed: 0,Position,Track Name,Artist,date,region,spotify_id
0,1,Che vita meravigliosa,Diodato,2020-01-01,it,38IMwP32I50uftvKOoDD09
1,2,Falling,Trevor Daniel,2020-01-01,it,4TnjEaWOeW0eKTKIEvJyCa
2,3,Vorrei Dirti,Anto Paga,2020-01-01,it,4zpAnyVfs0EQbD6VSZ6Dgs
3,4,Natale Reggaeton,RDS Christmas Band,2020-01-01,it,4pvtxWJNji6VBwJ9FiI9Ba
4,5,blun7 a swishland,tha Supreme,2020-01-01,it,7HwvPmK74MBRDhCIyMXReP


#### _2021: January 1 - February 20, 2021_

In [12]:
api = SpotifyCharts()
connector = sqlalchemy.create_engine("sqlite:///../data/italy_2021_v50.db", echo=False)
api.viral50Daily(output_file = "../data/italy_2021_v50.csv", output_db = connector, webhook = ["https://mywebhookssite.com/post/"], start = "2021-01-01", end = "2021-02-20", region = "it")

INFO : 21/02/2021 05:36:26 PM : Extracting viral 50 daily for 2021-01-01 - it
INFO : 21/02/2021 05:36:27 PM : Extracting viral 50 daily for 2021-01-02 - it
INFO : 21/02/2021 05:36:27 PM : Appending data to the table viral_50_daily
INFO : 21/02/2021 05:36:27 PM : POSTing data to the endpoint https://mywebhookssite.com/post/
INFO : 21/02/2021 05:36:27 PM : Appending data to the file ../data/italy_2021_v50.csv...
INFO : 21/02/2021 05:36:27 PM : Done appending to the file ../data/italy_2021_v50.csv!!!
INFO : 21/02/2021 05:36:27 PM : Done appending to the table viral_50_daily!!!
Exception in thread Thread-9:
Traceback (most recent call last):
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/site-packages/urllib3/connection.py", line 159, in _new_conn
    conn = connection.create_connection(
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/site-packages/urllib3/util/connection.py", line 61, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK

In [13]:
italy_2021_v50= pd.read_csv('../data/italy_2021_v50.csv')
italy_2021_v50.head()

Unnamed: 0,Position,Track Name,Artist,date,region,spotify_id
0,1,Problemas,Paris Boy,2021-01-01,it,2aQJOc2QUTdQl1J2Z9VxYO
1,2,fools (can't help falling in love) (feat. Sody),Foster,2021-01-01,it,4VEEDnEFLI9dUy5QA51rom
2,3,Concedimi,Matteo Romano,2021-01-01,it,0pFjYM7JdNJhjuooMgesks
3,4,MIA,Lortex,2021-01-01,it,1SIl1FBbHmGa4PmJu5q6Lm
4,5,Mayonaka no Door / Stay With Me,Miki Matsubara,2021-01-01,it,2BHj31ufdEqVK5CkYDp9mA


## Spain Top Charting Songs, Viral 50, 2017-2020

#### _2017_

In [37]:
api = SpotifyCharts()
connector = sqlalchemy.create_engine("sqlite:///../data/spain_2017_v50.db", echo=False)
api.viral50Daily(output_file = "../data/spain_2017_v50.csv", output_db = connector, webhook = ["https://mywebhookssite.com/post/"], start = "2017-01-01", end = "2017-12-31", region = "es")

INFO : 15/02/2021 12:37:41 AM : Extracting viral 50 daily for 2017-01-01 - es
INFO : 15/02/2021 12:37:42 AM : Extracting viral 50 daily for 2017-01-02 - es
INFO : 15/02/2021 12:37:42 AM : Appending data to the table viral_50_daily
INFO : 15/02/2021 12:37:42 AM : POSTing data to the endpoint https://mywebhookssite.com/post/
INFO : 15/02/2021 12:37:42 AM : Appending data to the file ../data/spain_2017_v50.csv...
INFO : 15/02/2021 12:37:42 AM : Done appending to the table viral_50_daily!!!
INFO : 15/02/2021 12:37:42 AM : Done appending to the file ../data/spain_2017_v50.csv!!!
Exception in thread Thread-36:
Traceback (most recent call last):
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/site-packages/urllib3/connection.py", line 159, in _new_conn
    conn = connection.create_connection(
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/site-packages/urllib3/util/connection.py", line 61, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOC

In [38]:
spain_2017_v50 = pd.read_csv('../data/spain_2017_v50.csv')
spain_2017_v50.head()

INFO : 15/02/2021 12:42:11 AM : Done appending to the table viral_50_daily!!!


Unnamed: 0,Position,Track Name,Artist,date,region,spotify_id
0,1.0,Una Mierda como Un Castillo,Anntona,2017-01-01,es,5hRzeXfIh4WYHjH6flVG0n
1,2.0,Last Christmas,Wham!,2017-01-01,es,2FRnf9qhLbvw8fu4IBXx78
2,3.0,La gallina Co-co-ua,Enrique Y Ana,2017-01-01,es,3fvMaybsEUfAVaVWa0Pa23
3,4.0,Freedom! '90,George Michael,2017-01-01,es,1D6nV9TPfMnWm7UdVsDVfI
4,5.0,Faith - Remastered,George Michael,2017-01-01,es,0HEmnAUT8PHznIAAmVXqFJ


In [39]:
spain_2017_v50.tail()

Unnamed: 0,Position,Track Name,Artist,date,region,spotify_id
18245,46.0,Sax - Operación Triunfo 2017,Ana Guerra,2017-12-31,es,4BI8uPYOfr2ViZ22wxje0E
18246,47.0,Bellagio - Gese da o Remix,Dano,2017-12-31,es,1QdGHjw5989keEMNZJmxuU
18247,48.0,La Bikina - Operación Triunfo 2017,Ana Guerra,2017-12-31,es,20lWMgXeA4NkJjPxtD5XnT
18248,49.0,Je suis venu te dire que je m'en vais - Operac...,Agoney,2017-12-31,es,4tRqtxPhhUi9NtbtBBrcLi
18249,50.0,Perfect Symphony (Ed Sheeran & Andrea Bocelli),Ed Sheeran,2017-12-31,es,3zl7j5ua8mF4JDYuxrfo01


#### _2018_

In [40]:
api = SpotifyCharts()
connector = sqlalchemy.create_engine("sqlite:///../data/spain_2018_v50.db", echo=False)
api.viral50Daily(output_file = "../data/spain_2018_v50.csv", output_db = connector, webhook = ["https://mywebhookssite.com/post/"], start = "2018-01-01", end = "2018-12-31", region = "es")

INFO : 15/02/2021 12:42:11 AM : Extracting viral 50 daily for 2018-01-01 - es
INFO : 15/02/2021 12:42:12 AM : Extracting viral 50 daily for 2018-01-02 - es
INFO : 15/02/2021 12:42:12 AM : Appending data to the table viral_50_daily
INFO : 15/02/2021 12:42:12 AM : POSTing data to the endpoint https://mywebhookssite.com/post/
INFO : 15/02/2021 12:42:12 AM : Appending data to the file ../data/spain_2018_v50.csv...
INFO : 15/02/2021 12:42:12 AM : Done appending to the file ../data/spain_2018_v50.csv!!!
Exception in thread Thread-39:
Traceback (most recent call last):
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/site-packages/urllib3/connection.py", line 159, in _new_conn
    conn = connection.create_connection(
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/site-packages/urllib3/util/connection.py", line 61, in create_connection
    INFO : 15/02/2021 12:42:12 AM : Done appending to the table viral_50_daily!!!
for res in socket.getaddrinfo(host, port, family, socket.SOC

In [41]:
spain_2018_v50 = pd.read_csv('../data/spain_2018_v50.csv')
spain_2018_v50.head()

Unnamed: 0,Position,Track Name,Artist,date,region,spotify_id
0,1,Camina - Operación Triunfo 2017,Operación Triunfo 2017,2018-01-01,es,5RtOTz6tsa5ssYJ084BaLW
1,2,Back In Black,Wing,2018-01-01,es,6dWSn5X8gFxuY9TxFoDMv9
2,3,This Is Me,Keala Settle,2018-01-01,es,2xGjteMU3E1tkEPVFBO08U
3,4,Bum Bum Tam Tam,MC Fioti,2018-01-01,es,4zWO4gvuFtw6EJZC5FFGlr
4,5,Date placer con mi cuerpo,La Tigresa del oriente,2018-01-01,es,0rtUXEaWVV9xLeoUVw8izV


In [10]:
spain_2018_v50.shape

(18250, 6)

#### _2019_

In [20]:
api = SpotifyCharts()
connector = sqlalchemy.create_engine("sqlite:///../data/spain_2019_v50.db", echo=False)
api.viral50Daily(output_file = "../data/spain_2019_v50.csv", output_db = connector, webhook = ["https://mywebhookssite.com/post/"], start = "2019-01-01", end = "2019-12-31", region = "es")

INFO : 15/02/2021 12:02:02 AM : Extracting viral 50 daily for 2019-01-01 - es
INFO : 15/02/2021 12:02:03 AM : Extracting viral 50 daily for 2019-01-02 - es
INFO : 15/02/2021 12:02:03 AM : POSTing data to the endpoint https://mywebhookssite.com/post/
INFO : 15/02/2021 12:02:03 AM : Appending data to the table viral_50_daily
INFO : 15/02/2021 12:02:03 AM : Appending data to the file ../data/spain_2019_v50.csv...
Exception in thread Thread-18:
Traceback (most recent call last):
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/site-packages/urllib3/connection.py", line 159, in _new_conn
INFO : 15/02/2021 12:02:03 AM : Done appending to the file ../data/spain_2019_v50.csv!!!
    conn = connection.create_connection(
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/site-packages/urllib3/util/connection.py", line 61, in create_connection
INFO : 15/02/2021 12:02:03 AM : Done appending to the table viral_50_daily!!!
    for res in socket.getaddrinfo(host, port, family, socket.SOC

In [21]:
spain_2019_v50= pd.read_csv('../data/spain_2019_v50.csv')
spain_2019_v50.tail()

INFO : 15/02/2021 12:07:26 AM : Done appending to the table viral_50_daily!!!


Unnamed: 0,Position,Track Name,Artist,date,region,spotify_id
18245,46.0,Bullet,HYYTS,2019-12-31,es,17y5RWKyvdNeYF7bkM7wKv
18246,47.0,Ya No Te Hago Falta,Sen Senra,2019-12-31,es,4RIi1gNmKDzSH04Vvws2DK
18247,48.0,Adore You,Harry Styles,2019-12-31,es,3jjujdWJ72nww5eGnfs2E7
18248,49.0,La Luna,Galvan Real,2019-12-31,es,06Ke9F6uDCPCRlryHzBegD
18249,50.0,Birthday Suit,Cosmo Sheldrake,2019-12-31,es,5feuWfAXA4rEKH9pXCjOV2


#### _2020 - Feb 13, 2021_

In [22]:
api = SpotifyCharts()
connector = sqlalchemy.create_engine("sqlite:///../data/spain_2020_v50.db", echo=False)
api.viral50Daily(output_file = "../data/spain_2020_v50.csv", output_db = connector, webhook = ["https://mywebhookssite.com/post/"], start = "2020-01-01", end = "2021-02-13", region = "es")

INFO : 15/02/2021 12:07:26 AM : Extracting viral 50 daily for 2020-01-01 - es
INFO : 15/02/2021 12:07:27 AM : Extracting viral 50 daily for 2020-01-02 - es
INFO : 15/02/2021 12:07:27 AM : Appending data to the table viral_50_daily
INFO : 15/02/2021 12:07:27 AM : POSTing data to the endpoint https://mywebhookssite.com/post/
INFO : 15/02/2021 12:07:27 AM : Appending data to the file ../data/spain_2020_v50.csv...
INFO : 15/02/2021 12:07:27 AM : Done appending to the file ../data/spain_2020_v50.csv!!!
Exception in thread Thread-21:
Traceback (most recent call last):
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/site-packages/urllib3/connection.py", line 159, in _new_conn
    conn = connection.create_connection(
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/site-packages/urllib3/util/connection.py", line 61, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/socket.py"

In [23]:
spain_2020_v50= pd.read_csv('../data/spain_2020_v50.csv')
spain_2020_v50.tail()

INFO : 15/02/2021 12:12:03 AM : Done appending to the table viral_50_daily!!!


Unnamed: 0,Position,Track Name,Artist,date,region,spotify_id
20495,46,Hasta Abajo,Kevin Roldan,2021-02-13,es,4PEfJZpVjdclfzZ9XO8uDw
20496,47,Strange (feat. Hillary Smith),Kris Bowers,2021-02-13,es,2bPWLtJe3v33z1j40sWB8a
20497,48,Hecha Pa' Mi,Boza,2021-02-13,es,3VvA1wSxukMLsvXoXtlwWx
20498,49,La Luz - A COLORS SHOW,María José Llergo,2021-02-13,es,5jRnVQbjB6qgN3MARS4xw7
20499,50,Boku no Sensou - TV Size,Shinsei Kamattechan,2021-02-13,es,3tRPfCFAEv6wWyQO0YnGGV


## Greece Top Charting Songs, Viral 50, 2017-2020

#### _2017_

In [24]:
api = SpotifyCharts()
connector = sqlalchemy.create_engine("sqlite:///../data/greece_2017_v50.db", echo=False)
api.viral50Daily(output_file = "../data/greece_2017_v50.csv", output_db = connector, webhook = ["https://mywebhookssite.com/post/"], start = "2017-01-01", end = "2017-12-31", region = "gr")

INFO : 15/02/2021 12:12:03 AM : Extracting viral 50 daily for 2017-01-01 - gr
INFO : 15/02/2021 12:12:04 AM : Extracting viral 50 daily for 2017-01-02 - gr
INFO : 15/02/2021 12:12:04 AM : Appending data to the table viral_50_daily
INFO : 15/02/2021 12:12:04 AM : Appending data to the file ../data/greece_2017_v50.csv...
INFO : 15/02/2021 12:12:04 AM : POSTing data to the endpoint https://mywebhookssite.com/post/
INFO : 15/02/2021 12:12:04 AM : Done appending to the file ../data/greece_2017_v50.csv!!!
Exception in thread Thread-24:
Traceback (most recent call last):
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/site-packages/urllib3/connection.py", line 159, in _new_conn
INFO : 15/02/2021 12:12:04 AM : Done appending to the table viral_50_daily!!!
    conn = connection.create_connection(
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/site-packages/urllib3/util/connection.py", line 61, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.S

In [25]:
greece_2017_v50 = pd.read_csv('../data/greece_2017_v50.csv')
greece_2017_v50.head()

Unnamed: 0,Position,Track Name,Artist,date,region,spotify_id
0,1.0,Careless Whisper,George Michael,2017-01-01,gr,4jDmJ51x1o9NZB5Nxxc7gY
1,2.0,Father Figure - Remastered,George Michael,2017-01-01,gr,0L0T4tMAaGqLgIVj1MOj9t
2,3.0,Faith - Remastered,George Michael,2017-01-01,gr,0HEmnAUT8PHznIAAmVXqFJ
3,4.0,Freedom! '90,George Michael,2017-01-01,gr,1D6nV9TPfMnWm7UdVsDVfI
4,5.0,"City Of Stars - From ""La La Land"" Soundtrack",Ryan Gosling,2017-01-01,gr,0LtVx5vsq9nfpGsWgjUin6


In [26]:
greece_2017_v50.tail()

Unnamed: 0,Position,Track Name,Artist,date,region,spotify_id
18182,46.0,Man's Not Hot,Big Shaq,2017-12-31,gr,2nUJvBO87SkxCViQsLc9Zr
18183,47.0,hometown,cleopatrick,2017-12-31,gr,2kFKIFc5RSTlhuGed6ZNZ7
18184,48.0,Without You,The Lidls,2017-12-31,gr,5ak3ZwyjRdwABZQfovFcZG
18185,49.0,Έτσι Κι Έτσι,Helena Paparizou,2017-12-31,gr,6afC8zWRpRBnifPzHaFW1L
18186,50.0,Petao,Yannis Christodoulopoulos,2017-12-31,gr,45WRSEX5cqHRqFR62C99j6


#### _2018_

In [27]:
api = SpotifyCharts()
connector = sqlalchemy.create_engine("sqlite:///../data/greece_2018_v50.db", echo=False)
api.viral50Daily(output_file = "../data/greece_2018_v50.csv", output_db = connector, webhook = ["https://mywebhookssite.com/post/"], start = "2018-01-01", end = "2018-12-31", region = "gr")

INFO : 15/02/2021 12:16:42 AM : Extracting viral 50 daily for 2018-01-01 - gr
INFO : 15/02/2021 12:16:43 AM : Extracting viral 50 daily for 2018-01-02 - gr
INFO : 15/02/2021 12:16:43 AM : Appending data to the table viral_50_daily
INFO : 15/02/2021 12:16:43 AM : POSTing data to the endpoint https://mywebhookssite.com/post/
INFO : 15/02/2021 12:16:43 AM : Appending data to the file ../data/greece_2018_v50.csv...
INFO : 15/02/2021 12:16:43 AM : Done appending to the file ../data/greece_2018_v50.csv!!!
Exception in thread Thread-27:
Traceback (most recent call last):
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/site-packages/urllib3/connection.py", line 159, in _new_conn
    conn = connection.create_connection(
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/site-packages/urllib3/util/connection.py", line 61, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/socket.p

In [35]:
greece_2018_v50 = pd.read_csv('../data/greece_2018_v50.csv')
greece_2018_v50.head()

Unnamed: 0,Position,Track Name,Artist,date,region,spotify_id
0,1,River (feat. Ed Sheeran),Eminem,2018-01-01,gr,5UEnHoDYpsxlfzWLZIc7LD
1,2,Him & I (with Halsey),G-Eazy,2018-01-01,gr,5k38wzpLb15YgncyWdTZE4
2,3,Stir Fry,Migos,2018-01-01,gr,4fndbjoz1qJyK6JcLdKfzm
3,4,Black & Chinese,Huncho Jack,2018-01-01,gr,628Ueb4sRiXApObhcU9iPU
4,5,MIC Drop (feat. Desiigner) [Steve Aoki Remix],BTS,2018-01-01,gr,7Ed6BkggCS2KaKY5YlINaF


In [29]:
greece_2018_v50.shape

(18250, 6)

#### _2019_

In [30]:
api = SpotifyCharts()
connector = sqlalchemy.create_engine("sqlite:///../data/greece_2019_v50.db", echo=False)
api.viral50Daily(output_file = "../data/greece_2019_v50.csv", output_db = connector, webhook = ["https://mywebhookssite.com/post/"], start = "2019-01-01", end = "2019-12-31", region = "gr")

INFO : 15/02/2021 12:21:06 AM : Extracting viral 50 daily for 2019-01-01 - gr
INFO : 15/02/2021 12:21:07 AM : Extracting viral 50 daily for 2019-01-02 - gr
INFO : 15/02/2021 12:21:07 AM : Appending data to the table viral_50_daily
INFO : 15/02/2021 12:21:07 AM : POSTing data to the endpoint https://mywebhookssite.com/post/
INFO : 15/02/2021 12:21:07 AM : Appending data to the file ../data/greece_2019_v50.csv...
INFO : 15/02/2021 12:21:07 AM : Done appending to the file ../data/greece_2019_v50.csv!!!
Exception in thread Thread-30:
Traceback (most recent call last):
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/site-packages/urllib3/connection.py", line 159, in _new_conn
    conn = connection.create_connection(
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/site-packages/urllib3/util/connection.py", line 61, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/socket.p

In [31]:
greece_2019_v50= pd.read_csv('../data/greece_2019_v50.csv')
greece_2019_v50.tail()

INFO : 15/02/2021 12:26:02 AM : Done appending to the table viral_50_daily!!!


Unnamed: 0,Position,Track Name,Artist,date,region,spotify_id
18245,46.0,Colpo Grosso,SNIK,2019-12-31,gr,4lCwnBZurX0OIqMKBOHRlS
18246,47.0,Bye Bye,EPITHE,2019-12-31,gr,7jrIGtjzmmyf7XLEARiFGn
18247,48.0,Peripoliko,VLOSPA,2019-12-31,gr,6rif5tdYfgPnXFQNDibSWU
18248,49.0,Fairytale of New York (feat. Kirsty MacColl),The Pogues,2019-12-31,gr,3VTNVsTTu05dmTsVFrmGpK
18249,50.0,ORANGE SODA,Baby Keem,2019-12-31,gr,5FkoSXiJPKTNyYgALRJFhD


#### _2020 - Feb 13, 2021_

In [32]:
api = SpotifyCharts()
connector = sqlalchemy.create_engine("sqlite:///../data/greece_2020_v50.db", echo=False)
api.viral50Daily(output_file = "../data/greece_2020_v50.csv", output_db = connector, webhook = ["https://mywebhookssite.com/post/"], start = "2020-01-01", end = "2021-02-13", region = "gr")

INFO : 15/02/2021 12:26:02 AM : Extracting viral 50 daily for 2020-01-01 - gr
INFO : 15/02/2021 12:26:02 AM : Extracting viral 50 daily for 2020-01-02 - gr
INFO : 15/02/2021 12:26:02 AM : Appending data to the table viral_50_daily
INFO : 15/02/2021 12:26:02 AM : POSTing data to the endpoint https://mywebhookssite.com/post/
INFO : 15/02/2021 12:26:02 AM : Appending data to the file ../data/greece_2020_v50.csv...
INFO : 15/02/2021 12:26:02 AM : Done appending to the file ../data/greece_2020_v50.csv!!!
Exception in thread Thread-33:
Traceback (most recent call last):
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/site-packages/urllib3/connection.py", line 159, in _new_conn
    conn = connection.create_connection(
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/site-packages/urllib3/util/connection.py", line 61, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
  File "/Users/emilynaftalin/opt/anaconda3/lib/python3.8/socket.p

In [33]:
greece_2020_v50= pd.read_csv('../data/greece_2020_v50.csv')
greece_2020_v50.tail()

INFO : 15/02/2021 12:30:36 AM : Done appending to the table viral_50_daily!!!


Unnamed: 0,Position,Track Name,Artist,date,region,spotify_id
20495,46,Hayloft,Mother Mother,2021-02-13,gr,2kS6td1yvmpNgZTt1q5pQq
20496,47,TAHAMOU,RICTA,2021-02-13,gr,7G2lz6En32ncbkpE6VmUdu
20497,48,Hadal Ahbek,Issam Alnajjar,2021-02-13,gr,0UdZzUSOLhqIFCU7wHWkgp
20498,49,Do I Wanna Know?,Arctic Monkeys,2021-02-13,gr,3rq5w4bQGigXOfdN30ATJt
20499,50,Ernest Hebrard,RICTA,2021-02-13,gr,0IeVaezBMTHlniQOcKIqOr


## Audio Features 

Obtaining access to audio features for each track using Spotify's Client Credentials Flow (see above) and a wrapper library [Spotipy](https://spotipy.readthedocs.io/en/2.16.1/). 

_**Citation: Code for scraping audio features borrowed from [CNN_for_Dance_Music_Classification](https://github.com/amytaylor330/CNN_for_Dance_Music_Classification)._

In [7]:
def get_merge_audio_features(song_df, id_col, batchsize=100):
    
    '''
    function that requests audio features for songs in the DataFrame and appends them to original DataFrame.
    '''
    
    features_list = []
    
    None_counter = 0
    
    for i in range(0, len(song_df[id_col]), batchsize):
        
        batch = song_df[id_col][i:i+batchsize]
        
        feature_results = sp.audio_features(batch)
        
        for i, t in enumerate(feature_results):
            if t == None: 
                None_counter += 1
            else: 
                features_list.append(t)
                
    print('Number of tracks where no audio features were available:', None_counter)
    print('Number of usable tracks:', len(features_list))
    
    features_df = pd.DataFrame(features_list)
    
    combined_df = pd.concat([song_df, features_df], axis=1)
    
    return combined_df 

In [8]:
def clean_song_features_df(df, cols_to_drop, pickle_path):
    
    '''
    Function to clean and pickle the combined DataFrame. 
    '''
    
    # copy original dataframe so that it is not altered 
    df_clean = df.copy()
    
    # drop unnecessary columns 
    df_clean.drop(columns=cols_to_drop, inplace=True)
    
    # convert date column date range to a single day that is the first date in the range (happens to be the Friday of that week)
    df_clean['date'] = df_clean['date'].apply(lambda x: x[:10])
    
    # converting date column to datetime format
    df_clean['date'] = pd.to_datetime(df_clean['date'], errors='coerce')
    
    # setting date column as df index
    df_clean.set_index('date', inplace=True)
    
    # pickle clean dataframe to use in other notebooks 
    df_clean.to_pickle(pickle_path)
    
    return df_clean 

#### _Merging DataFrames_

Below, I will use the `get_merge_audio_features` function to create  new combined DataFrames for 2017-19 and 2020 for each of the three countries (Italy, Spain, Greece). Then I will use the `clean_song_features` function to clean the DataFrames so that they are ready for visualization & modeling and also pickle them. 

_I scraped the charts for each separately in case I wanted to separate DataFrames, but now I will now merge them into one DataFrame years 2017-2019 because this is more useful for EDA and the start of time series modeling._

_Some dates seem to have been omitted by Spotify, so all the columns are null for those rows. I will drop these rows._ 

_Finally, I reset the index so it can be concatenated with features dataframe later on_

In [44]:
italy_17_19_v50 = pd.concat([italy_2017_v50, italy_2018_v50, italy_2019_v50])
italy_17_19_v50.head()

Unnamed: 0,Position,Track Name,Artist,date,region,spotify_id
0,1.0,Careless Whisper,George Michael,2017-01-01,it,4jDmJ51x1o9NZB5Nxxc7gY
1,2.0,Tranquilla!,Fabrizio Vidale,2017-01-01,it,7D64ejn2d3g6QaahkYdKXH
2,3.0,Jesus to a Child,George Michael,2017-01-01,it,2SzCxX6M6vDwdEwnHDiTaY
3,4.0,Faith - Remastered,George Michael,2017-01-01,it,0HEmnAUT8PHznIAAmVXqFJ
4,5.0,Freedom! '90,George Michael,2017-01-01,it,1D6nV9TPfMnWm7UdVsDVfI


In [46]:
italy_17_19_v50.tail()

Unnamed: 0,Position,Track Name,Artist,date,region,spotify_id
18245,46.0,Memories,Maroon 5,2019-12-31,it,2b8fOow8UzyDFAE27YhOZM
18246,47.0,CHEYENNE - feat. Charlie Charles,Francesca Michielin,2019-12-31,it,3tSiJqSOzHrp828XhPXkst
18247,48.0,Adore You,Harry Styles,2019-12-31,it,3jjujdWJ72nww5eGnfs2E7
18248,49.0,C'est La Vie,TY1,2019-12-31,it,5l93giGtSBNAJUVj8hPygt
18249,50.0,CRUDELIA - I nervi,Marracash,2019-12-31,it,1OeIJITrlUR4qss2kywMEn


In [45]:
italy_17_19_v50.shape

(54750, 6)

In [72]:
# dropping null rows 
italy_17_19_v50.dropna(inplace=True)

In [73]:
# resettting index
italy_17_19_v50.reset_index(drop=True, inplace=True)
italy_17_19_v50.tail()

Unnamed: 0,Position,Track Name,Artist,date,region,spotify_id
54390,46.0,Memories,Maroon 5,2019-12-31,it,2b8fOow8UzyDFAE27YhOZM
54391,47.0,CHEYENNE - feat. Charlie Charles,Francesca Michielin,2019-12-31,it,3tSiJqSOzHrp828XhPXkst
54392,48.0,Adore You,Harry Styles,2019-12-31,it,3jjujdWJ72nww5eGnfs2E7
54393,49.0,C'est La Vie,TY1,2019-12-31,it,5l93giGtSBNAJUVj8hPygt
54394,50.0,CRUDELIA - I nervi,Marracash,2019-12-31,it,1OeIJITrlUR4qss2kywMEn


In [47]:
spain_17_19_v50 = pd.concat([spain_2017_v50, spain_2018_v50, spain_2019_v50])
spain_17_19_v50.head()

Unnamed: 0,Position,Track Name,Artist,date,region,spotify_id
0,1.0,Una Mierda como Un Castillo,Anntona,2017-01-01,es,5hRzeXfIh4WYHjH6flVG0n
1,2.0,Last Christmas,Wham!,2017-01-01,es,2FRnf9qhLbvw8fu4IBXx78
2,3.0,La gallina Co-co-ua,Enrique Y Ana,2017-01-01,es,3fvMaybsEUfAVaVWa0Pa23
3,4.0,Freedom! '90,George Michael,2017-01-01,es,1D6nV9TPfMnWm7UdVsDVfI
4,5.0,Faith - Remastered,George Michael,2017-01-01,es,0HEmnAUT8PHznIAAmVXqFJ


In [74]:
# dropping null rows 
spain_17_19_v50.dropna(inplace=True)

In [75]:
# resettting index
spain_17_19_v50.reset_index(drop=True, inplace=True)
spain_17_19_v50.tail()

Unnamed: 0,Position,Track Name,Artist,date,region,spotify_id
54369,46.0,Bullet,HYYTS,2019-12-31,es,17y5RWKyvdNeYF7bkM7wKv
54370,47.0,Ya No Te Hago Falta,Sen Senra,2019-12-31,es,4RIi1gNmKDzSH04Vvws2DK
54371,48.0,Adore You,Harry Styles,2019-12-31,es,3jjujdWJ72nww5eGnfs2E7
54372,49.0,La Luna,Galvan Real,2019-12-31,es,06Ke9F6uDCPCRlryHzBegD
54373,50.0,Birthday Suit,Cosmo Sheldrake,2019-12-31,es,5feuWfAXA4rEKH9pXCjOV2


In [48]:
greece_17_19_v50 = pd.concat([greece_2017_v50, greece_2018_v50, greece_2019_v50])
greece_17_19_v50.tail()

Unnamed: 0,Position,Track Name,Artist,date,region,spotify_id
18245,46.0,Colpo Grosso,SNIK,2019-12-31,gr,4lCwnBZurX0OIqMKBOHRlS
18246,47.0,Bye Bye,EPITHE,2019-12-31,gr,7jrIGtjzmmyf7XLEARiFGn
18247,48.0,Peripoliko,VLOSPA,2019-12-31,gr,6rif5tdYfgPnXFQNDibSWU
18248,49.0,Fairytale of New York (feat. Kirsty MacColl),The Pogues,2019-12-31,gr,3VTNVsTTu05dmTsVFrmGpK
18249,50.0,ORANGE SODA,Baby Keem,2019-12-31,gr,5FkoSXiJPKTNyYgALRJFhD


In [76]:
greece_17_19_v50.isnull().sum()

Position      350
Track Name    356
Artist        356
date            0
region          0
spotify_id    350
dtype: int64

In [77]:
# dropping null rows 
greece_17_19_v50.dropna(inplace=True)

In [78]:
# resettting index
greece_17_19_v50.reset_index(drop=True, inplace=True)
greece_17_19_v50.tail()

Unnamed: 0,Position,Track Name,Artist,date,region,spotify_id
54326,46.0,Colpo Grosso,SNIK,2019-12-31,gr,4lCwnBZurX0OIqMKBOHRlS
54327,47.0,Bye Bye,EPITHE,2019-12-31,gr,7jrIGtjzmmyf7XLEARiFGn
54328,48.0,Peripoliko,VLOSPA,2019-12-31,gr,6rif5tdYfgPnXFQNDibSWU
54329,49.0,Fairytale of New York (feat. Kirsty MacColl),The Pogues,2019-12-31,gr,3VTNVsTTu05dmTsVFrmGpK
54330,50.0,ORANGE SODA,Baby Keem,2019-12-31,gr,5FkoSXiJPKTNyYgALRJFhD


#### _Italy 2017-19_

In [79]:
italy_17_19_v50_raw = get_merge_audio_features(italy_17_19_v50, 'spotify_id')
italy_17_19_v50_raw.head(2)

Number of tracks where no audio features were available: 0
Number of usable tracks: 54395


Unnamed: 0,Position,Track Name,Artist,date,region,spotify_id,danceability,energy,key,loudness,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,1.0,Careless Whisper,George Michael,2017-01-01,it,4jDmJ51x1o9NZB5Nxxc7gY,0.574,0.629,2,-8.815,...,0.271,0.786,153.119,audio_features,4jDmJ51x1o9NZB5Nxxc7gY,spotify:track:4jDmJ51x1o9NZB5Nxxc7gY,https://api.spotify.com/v1/tracks/4jDmJ51x1o9N...,https://api.spotify.com/v1/audio-analysis/4jDm...,300107,4
1,2.0,Tranquilla!,Fabrizio Vidale,2017-01-01,it,7D64ejn2d3g6QaahkYdKXH,0.822,0.736,5,-5.775,...,0.054,0.59,135.132,audio_features,7D64ejn2d3g6QaahkYdKXH,spotify:track:7D64ejn2d3g6QaahkYdKXH,https://api.spotify.com/v1/tracks/7D64ejn2d3g6...,https://api.spotify.com/v1/audio-analysis/7D64...,163893,4


In [80]:
cols_to_drop = ['type', 'id', 'uri', 'track_href', 'analysis_url']

In [81]:
italy_17_19_v50_df = clean_song_features_df(italy_17_19_v50_raw, cols_to_drop, '../data/it_17_19_v50_feat.pkl')
italy_17_19_v50_df.head(2)

Unnamed: 0_level_0,Position,Track Name,Artist,region,spotify_id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2017-01-01,1.0,Careless Whisper,George Michael,it,4jDmJ51x1o9NZB5Nxxc7gY,0.574,0.629,2,-8.815,0,0.0363,0.128,0.0,0.271,0.786,153.119,300107,4
2017-01-01,2.0,Tranquilla!,Fabrizio Vidale,it,7D64ejn2d3g6QaahkYdKXH,0.822,0.736,5,-5.775,1,0.313,0.251,0.0,0.054,0.59,135.132,163893,4


#### _Italy 2020_

In [19]:
italy_20_v50_raw = get_merge_audio_features(italy_2020_v50, 'spotify_id')

italy_20_v50_df = clean_song_features_df(italy_20_v50_raw, cols_to_drop, '../data/it_20_v50_feat.pkl')

italy_20_v50_df.head(2)

Number of tracks where no audio features were available: 0
Number of usable tracks: 20500


Unnamed: 0_level_0,Position,Track Name,Artist,region,spotify_id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2020-01-01,1,Che vita meravigliosa,Diodato,it,38IMwP32I50uftvKOoDD09,0.503,0.669,11,-7.76,0,0.0385,0.0238,6e-06,0.142,0.381,95.047,212253,4
2020-01-01,2,Falling,Trevor Daniel,it,4TnjEaWOeW0eKTKIEvJyCa,0.785,0.431,10,-8.756,0,0.0364,0.123,0.0,0.0887,0.236,127.085,159382,4


#### _Italy 2021_

In [20]:
cols_to_drop = ['type', 'id', 'uri', 'track_href', 'analysis_url']

In [21]:
italy_21_v50_raw = get_merge_audio_features(italy_2021_v50, 'spotify_id')

italy_21_v50_df = clean_song_features_df(italy_21_v50_raw, cols_to_drop, '../data/it_21_v50_feat.pkl')

italy_21_v50_df.head(2)

Number of tracks where no audio features were available: 0
Number of usable tracks: 2550


Unnamed: 0_level_0,Position,Track Name,Artist,region,spotify_id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2021-01-01,1,Problemas,Paris Boy,it,2aQJOc2QUTdQl1J2Z9VxYO,0.787,0.238,1,-13.154,0,0.0554,0.677,2.2e-05,0.0869,0.262,98.022,222892,4
2021-01-01,2,fools (can't help falling in love) (feat. Sody),Foster,it,4VEEDnEFLI9dUy5QA51rom,0.706,0.604,2,-6.932,1,0.303,0.418,0.0,0.242,0.361,82.03,165029,4


#### _Spain 2017-19_

In [86]:
spain_17_19_v50_raw = get_merge_audio_features(spain_17_19_v50, 'spotify_id')

spain_17_19_v50_df = clean_song_features_df(spain_17_19_v50_raw, cols_to_drop, '../data/sp_17_19_v50_feat.pkl')

spain_17_19_v50_df.head(2)

Number of tracks where no audio features were available: 0
Number of usable tracks: 54374


Unnamed: 0_level_0,Position,Track Name,Artist,region,spotify_id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2017-01-01,1.0,Una Mierda como Un Castillo,Anntona,es,5hRzeXfIh4WYHjH6flVG0n,0.414,0.802,0,-6.947,1,0.0468,0.00266,0.0,0.0774,0.792,165.043,130120,4
2017-01-01,2.0,Last Christmas,Wham!,es,2FRnf9qhLbvw8fu4IBXx78,0.735,0.478,2,-12.472,1,0.0293,0.189,2e-06,0.355,0.947,107.682,262960,4


#### _Spain 2020_

In [87]:
spain_20_v50_raw = get_merge_audio_features(spain_2020_v50, 'spotify_id')

spain_20_v50_df = clean_song_features_df(spain_20_v50_raw, cols_to_drop, '../data/sp_20_v50_feat.pkl')

spain_20_v50_df.head(2)

Number of tracks where no audio features were available: 0
Number of usable tracks: 20500


Unnamed: 0_level_0,Position,Track Name,Artist,region,spotify_id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2020-01-01,1,Tusa,KAROL G,es,7k4t7uLgtOxPwTpFmtJNTY,0.803,0.715,2,-3.28,1,0.298,0.295,0.000134,0.0574,0.574,101.085,200960,4
2020-01-01,2,Falling,Trevor Daniel,es,4TnjEaWOeW0eKTKIEvJyCa,0.785,0.431,10,-8.756,0,0.0364,0.123,0.0,0.0887,0.236,127.085,159382,4


#### _Greece 2017-19_

In [88]:
greece_17_19_v50_raw = get_merge_audio_features(greece_17_19_v50, 'spotify_id')

greece_17_19_v50_df = clean_song_features_df(greece_17_19_v50_raw, cols_to_drop, '../data/gr_17_19_v50_feat.pkl')

greece_17_19_v50_df.head(2)

Number of tracks where no audio features were available: 0
Number of usable tracks: 54331


Unnamed: 0_level_0,Position,Track Name,Artist,region,spotify_id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2017-01-01,1.0,Careless Whisper,George Michael,gr,4jDmJ51x1o9NZB5Nxxc7gY,0.574,0.629,2,-8.815,0,0.0363,0.128,0.0,0.271,0.786,153.119,300107,4
2017-01-01,2.0,Father Figure - Remastered,George Michael,gr,0L0T4tMAaGqLgIVj1MOj9t,0.689,0.333,3,-13.288,1,0.0262,0.163,3.4e-05,0.0853,0.284,101.988,336667,4


#### _Greece 2020_

In [90]:
greece_20_v50_raw = get_merge_audio_features(greece_2020_v50, 'spotify_id')

greece_20_v50_df = clean_song_features_df(greece_20_v50_raw, cols_to_drop, '../data/gr_20_v50_feat.pkl')

greece_20_v50_df.head(2)

Number of tracks where no audio features were available: 0
Number of usable tracks: 20500


Unnamed: 0_level_0,Position,Track Name,Artist,region,spotify_id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2020-01-01,1,Falling,Trevor Daniel,gr,4TnjEaWOeW0eKTKIEvJyCa,0.785,0.431,10,-8.756,0,0.0364,0.123,0.0,0.0887,0.236,127.085,159382,4
2020-01-01,2,No Idea,Don Toliver,gr,3VyjsVV24RmBIbWJAeUJNu,0.651,0.631,6,-5.717,0,0.0896,0.519,0.000579,0.165,0.35,127.994,154424,4


In [91]:
greece_20_v50_df.tail()

Unnamed: 0_level_0,Position,Track Name,Artist,region,spotify_id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2021-02-13,46,Hayloft,Mother Mother,gr,2kS6td1yvmpNgZTt1q5pQq,0.527,0.922,9,-2.666,0,0.0728,0.00475,0.000162,0.422,0.49,95.975,181680,4
2021-02-13,47,TAHAMOU,RICTA,gr,7G2lz6En32ncbkpE6VmUdu,0.854,0.496,2,-10.528,1,0.343,0.558,0.0,0.0921,0.339,140.051,240527,4
2021-02-13,48,Hadal Ahbek,Issam Alnajjar,gr,0UdZzUSOLhqIFCU7wHWkgp,0.827,0.485,1,-10.384,0,0.0352,0.172,0.0,0.0798,0.384,94.997,209684,4
2021-02-13,49,Do I Wanna Know?,Arctic Monkeys,gr,3rq5w4bQGigXOfdN30ATJt,0.548,0.532,5,-7.596,1,0.0323,0.186,0.000263,0.217,0.405,85.03,272394,4
2021-02-13,50,Ernest Hebrard,RICTA,gr,0IeVaezBMTHlniQOcKIqOr,0.743,0.589,1,-9.564,1,0.196,0.256,3e-06,0.195,0.135,139.967,183488,4


#### _Italy 2021_