### Importing librairies

In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import getpass
import pprint
import pandas as pd
from sqlalchemy import create_engine
import re
import time

### API connection

In [3]:
c_id = getpass.getpass()
c_se = getpass.getpass()

In [4]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=c_id, client_secret=c_se, requests_timeout=1000))

### Database connexion

In [None]:
pw = "password1999"
connection_string = 'mysql+pymysql://root:' + pw + '@localhost:3306/'
engine = create_engine(connection_string, pool_pre_ping = False)

### Get maximum from genres

In [None]:
def tracks_items_to_df(tracks_items) :
    result = pd.DataFrame(columns=["track_id","track_name","track_preview_url","track_popularity","track_duration","artists_id","artists_name","album_id","album_name","album_ release_date"])

    traks_fields = {"track_id":"id","track_name":"name","track_preview_url":"preview_url","track_duration":"duration_ms","track_explicit":"explicit","track_popularity":"popularity"}
    artist_fields = {"artist_id":"id","artist_name":"name"}
    album_fields = {"album_id":"id","album_name":"name","album_release_date":"release_date"}

    dicts_list = []

    for item in tracks_items :
        row = {} 
        for field in traks_fields.items() : 
            row[field[0]] = item[field[1]]

        for artist in item["artists"]: # Comment c'est géré quand il y a plusieurs artistes ?
            for field in artist_fields.items() : 
                row[field[0]] = artist[field[1]]

        for field in album_fields.items() : 
            row[field[0]] = item["album"][field[1]]
       
        dicts_list.append(row)

    return pd.DataFrame(dicts_list)
    

In [None]:
genres = ["acoustic", "afrobeat", "alt-rock", "alternative", "ambient", "anime", "black-metal", "bluegrass", "blues", "brazil", "breakbeat", "british", "cantopop", "chicago-house", "children", "chill", "classical", "club", "comedy", "country", "dance", "dancehall", "death-metal", "deep-house", "detroit-techno", "disco", "drum-and-bass", "dub", "dubstep", "edm", "electro", "electronic", "emo", "folk", "forro", "french", "funk", "garage", "german", "gospel", "goth", "grindcore", "groove", "grunge", "guitar", "happy", "hard-rock", "hardcore", "hardstyle", "heavy-metal", "hip-hop", "honky-tonk", "house", "idm", "indian", "indie", "indie-pop", "industrial", "iranian", "j-dance", "j-idol", "j-pop", "j-rock", "jazz", "k-pop", "kids", "latin", "latino", "malay", "mandopop", "metal", "metalcore", "minimal-techno",  "mpb", "new-age", "opera", "pagode", "party", "piano", "pop", "pop-film", "power-pop", "progressive-house", "psych-rock", "punk", "punk-rock", "r-n-b", "reggae", "reggaeton","rock", "rock-n-roll", "rockabilly", "romance", "sad", "salsa", "samba", "sertanejo", "show-tunes", "singer-songwriter", "ska", "sleep", "songwriter", "soul", "spanish", "study", "swedish", "synth-pop", "tango", "techno", "trance", "trip-hop", "turkish", "world-music"]

In [None]:
def get_df_from_genres(genres) :
    df = pd.DataFrame(columns=["track_id","track_name","track_genre","track_preview_url","track_popularity","track_duration","artist_id","artist_name","album_id","album_name","album_release_date"])
    for genre in genres :
        df_genre = pd.DataFrame(columns=["track_id","track_name","track_genre","track_preview_url","track_popularity","track_duration","artist_id","artist_name","album_id","album_name","album_release_date"])
        for offset in range(0,1000,50) :

            query = "genre:{}".format(genre)
            df_offset = sp.search(q=query, type='track', limit=50, offset = offset)["tracks"]["items"]
            df_offset = tracks_items_to_df(df_offset)
            try : 
                df_offset.insert(2,"track_genre",genre)
            except : # if it's fail to get specific genre
                print(genre) 
                break

            df_genre = pd.concat([df_genre,df_offset])

        df_genre.to_sql("df_{}".format(genre.replace("-","")), connection_string, schema='spotify', if_exists='replace', index = False)
        
        df = pd.concat([df,df_genre])

    return df

In [None]:
df = get_df_from_genres(genres)

In [None]:
df.to_sql("df_all_genres", connection_string, schema='spotify', if_exists='replace', index = False)

### Merging tables to df_all_genres

In [None]:
tables_names = ["df_{}".format(genre.replace("-","")) for genre in genres]

In [None]:
df_all_genres = pd.read_sql("SELECT * FROM `spotify`.`{}`".format(tables_names[0]), engine)

for table in tables_names[1:] :
    MYSQL_CONNECTION_STRING = "mysql://root@localhost/ironhack"
    df_genre = pd.read_sql("SELECT * FROM spotify.{}".format(table), engine)
    df_all_genres = pd.concat([df_all_genres,df_genre])

df_all_genres

### Get more tracks with recommandations

In [None]:
def get_rec(df):
    df_1 = pd.DataFrame()
    for i in range(len(df)):
        r = sp.recommendations(seed_artists=[str(df['artist_id'][i])],seed_tracks=[str(df['track_id'][i])],seed_genres=[df["track_genre"][i]],limit=100) #adapt seed_genres
        df_2 = tracks_items_to_df(r['tracks'])
        df_1 = pd.concat([df_1,df_2]).reset_index(drop=True)
    return df_1

In [None]:
for genre in genres :
    print("start genre : {}".format(genre))
    df = pd.read_sql("SELECT * FROM spotify.df_{}".format(genre.replace("-","")), engine)
    df_reco = get_rec(df.loc[:100,:])
    df_reco.to_sql("df_{}_reco".format(genre.replace("-","")), connection_string, schema='spotify', if_exists='replace', index = False)

### Merging reco tables

In [None]:
reco_tables = ["df_{}_reco".format(genre.replace("-","")) for genre in genres ]

Ajouter colonne "genre de provenance" ajouter les audios features, merge 

In [None]:
def merging_reco_tables() :
    pattern = "(?<=_)[^_]+(?=_)"

    df_reco = pd.read_sql("SELECT * FROM `spotify`.`{}`".format(reco_tables[0]), engine)
    df_reco.insert(2,"track_genre",re.search(pattern, reco_tables[0]).group(0))

    for table_name in reco_tables[1:] :
        df_table = pd.read_sql("SELECT * FROM `spotify`.`{}`".format(table_name), engine) #import table in df
        df_table.insert(2,"track_genre",re.search(pattern, table_name).group(0)) #add the column "track_genre" to df
        df_reco = pd.concat([df_reco,df_table])
    
    return df_reco

In [None]:
df_merger_reco = merging_reco_tables()