In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns

import json

from fycharts.SpotifyCharts import SpotifyCharts
import sqlalchemy

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [2]:
with open("../spotify_credentials.json", "r") as json_file:
    creds = json.load(json_file)

my_client_id = creds['SPOTIPY_CLIENT_ID']
my_client_secret = creds['SPOTIPY_CLIENT_SECRET']

client_credentials_manager = SpotifyClientCredentials(client_id=my_client_id, client_secret=my_client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [None]:
def get_top_200_weekly(output_file, output_db, start_date, end_date, region):
    api = SpotifyCharts()
    connector = sqlalchemy.create_engine("sqlite:///../data/italy_2017.db", echo=False)
api.top200Weekly(output_file = "../data/italy_2017.csv", output_db = connector, webhook = ["https://mywebhookssite.com/post/"], start = "2017-01-01", end = "2017-12-31", region = "it")

In [3]:
def clean_song_features_df(df, cols_to_drop, pickle_path):
    
    # drop unnecessary columns 
    df.drops(columns=cols_to_drop, inplace=True)
    
    # convert date column date range to a single day that is the first date in the range (happens to be the Friday of that week)
    df['date'] = df['date'].apply(lambda x: x[:10])
    
    # converting date column to datetime format
    df['date'] = pd.to_datetime(df['date'])
    
    # setting date column as df index
    df.set_index('date', inplace=True)
    
    # pickle clean dataframe to use in other notebooks 
    df.to_pickle(pickle_path)

In [4]:
def deg_merge_audio_features(song_df, id_col, batchsize=100):
    
    features_list = []
    
    None_counter = 0
    
    for i in range(0, len(song_df[id_col]), batchsize):
        
        batch = song_df[id_col][i:i+batchsize]
        
        feature_results = sp.audio_features(batch)
        
        for i, t in enumerate(feature_results):
            if t == None: 
                None_counter += 1
            else: 
                features_list.append(t)
                
    print('Number of tracks where no audio features were available:', None_counter)
    print('Number of usable tracks:', len(features_list))
    
    features_df = pd.DataFrame(features_list)
    
    combined_df = pd.concat([song_df, features_df], axis=1)
    
    return combined_df 