In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import json
from googleapiclient.discovery import build
from google.oauth2.service_account import Credentials

In [2]:
import pandas as pd
import re
import pprint

## Connexion a spotify

In [3]:
CRED_PATH_SPOTIFY = "../credentials-spotify.json"

In [4]:
with open(CRED_PATH_SPOTIFY, 'r') as handle:
    data = json.load(handle)

In [5]:
spotify = spotipy.Spotify(auth_manager=SpotifyClientCredentials(
    **data
))

## Connexion a google

In [6]:
CREDENTIALS_PATH_GOOGLE = "../credentials-sheets.json"

In [7]:
SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly']
SPREADSHEET = '1b75J-QTGrujSgF9r0_JPOKkcXAwzFVwpETOAyVBw8ak'

In [8]:
# Load service account credentials.
__credentials = Credentials.from_service_account_file(CREDENTIALS_PATH_GOOGLE, scopes=SCOPES)

# Creates Google Sheets API (v4/latest) service.
service = build('sheets', 'v4', credentials=__credentials)

In [9]:
values = service.spreadsheets().values().get(spreadsheetId=SPREADSHEET, range='Notations').execute()['values']
headers = values.pop(0)

In [10]:
df = pd.DataFrame(values, columns=headers)

# DF preprocessing

In [11]:
df.set_index(['genre', 'sub_genre', 'artist', 'album', 'song'], inplace=True)
df = df.apply(lambda s: s.str.replace(",", "."))
df = df.apply(lambda s: pd.to_numeric(s, errors='coerce'))

In [12]:
df.describe()

Unnamed: 0,ntQ,ntG,ntV,ntR,ntS,ntGl,ntRx,ntC,ntL
count,1356.0,616.0,737.0,212.0,251.0,113.0,19.0,20.0,14.0
mean,7.703724,8.104464,7.388738,7.713443,6.653586,7.484513,7.973684,6.435,6.944286
std,1.309181,0.825316,1.343893,1.586352,1.945438,1.724949,2.288095,1.805044,3.057809
min,0.0,4.0,0.0,0.1,0.05,0.0,2.0,0.0,0.0
25%,7.5,7.6875,7.0,7.3,5.0,7.0,7.5,5.95,6.8475
50%,8.0,8.25,7.5,8.0,7.0,7.5,8.0,6.625,8.0
75%,8.5,8.75,8.5,8.7,8.0,8.5,10.0,7.375,8.875
max,10.0,9.5,9.75,9.7,10.0,10.0,10.0,8.75,9.33


In [63]:
def eq_ignorecase(a, b):
    return re.search(a, b, re.IGNORECASE)    

In [77]:
def filter_search(row, res):
    songs = res['tracks']['items']
    valid = []
    for song in songs:
        s_artist = song["artists"][0]["name"]
        s_album = song["album"]["name"]
        s_title = song["name"]
        if eq_ignorecase(row["artist"], s_artist) and\
           eq_ignorecase(row["album"], s_album) and\
           eq_ignorecase(row["song"], s_title):
            valid.append(song)
    print(row["artist"], row["album"], row["song"])
    print(len(valid) >= 1)

In [28]:
search = "artist:\"Black Sabbath\" track:\"Evil Woman\" album:\"Black Sabbath\""
search = "artist:\"Black\%20Sabbath\"%20track:\"Evil%20Woman\"%20album:\"Black%20Sabbath\""
spotify.search(search)

{'tracks': {'href': 'https://api.spotify.com/v1/search?query=artist%3A%22Black%5C%2520Sabbath%22%2520track%3A%22Evil%2520Woman%22%2520album%3A%22Black%2520Sabbath%22&type=track&offset=0&limit=10',
  'items': [],
  'limit': 10,
  'next': None,
  'offset': 0,
  'previous': None,
  'total': 0}}

In [24]:
bad_format = []
for idx, (_, content) in enumerate(df.index.to_frame().iterrows()):
    search = f"artist:\"{content.artist}\" track:\"{content.song}\" album:\"{content.album}\""
    search = search.replace("'", "")
    res = spotify.search(search)
    try:
        track = res['tracks']['items'][0]
    except IndexError as e:
        print(f"{search} not in spotify")
        bad_format.append(search)
        continue
    album = track['album']['name']
    name = track['name']
    artist = track['artists'][0]['name']
    id = track['id']
    print(f"{idx:<4}/{len(df)} : {id} {name} {artist} {album}")

0   /1573 : 57BDT1bvzeAzl3IXMQU4Ri I Do, I Do, I Do, I Do, I Do ABBA Abba
1   /1573 : 22NN4BS1AlqVbyKIWExgON Mamma Mia ABBA Abba


2   /1573 : 5pMmWfuL0FTGshYt7HVJ8P SOS ABBA ABBA Gold
3   /1573 : 46ou4l4zvrZMada0TgoVH9 Knowing Me, Knowing You ABBA Arrival


4   /1573 : 6cH34Jb2W9s9w8ooRtZZPf Lay All Your Love On Me ABBA Super Trouper
5   /1573 : 2nMghZvtLx6DDgTEHEsb4w Super Trouper ABBA Super Trouper


6   /1573 : 2HeTmGTjl870ucJ8mF7zl5 The Winner Takes It All ABBA Super Trouper


7   /1573 : 6vQN2a9QSgWcm74KEZYfDL Take A Chance On Me ABBA The Album
8   /1573 : 0RzhMHIsFMbOGh0oWDvNNK Waterloo ABBA Waterloo


9   /1573 : 7LRMbd3LEoV5wZJvXT1Lwb T.N.T. AC/DC High Voltage
10  /1573 : 2zYzyRzz6pRmhPzyfMEC8s Highway to Hell AC/DC Highway to Hell


11  /1573 : 5jCp5VtcpUlHtW8Dwlx13Y Angry Chair Alice In Chains Dirt
12  /1573 : 22Ntyke0ZDZy2Uuf8BEDkU Dam That River Alice In Chains Dirt


13  /1573 : 2QjkH9q5Mypj6m38u7Ni9o Dirt Alice In Chains Dirt


14  /1573 : 7FRfYOql61DGDp9VPPe2qA Down In A Hole Alice In Chains Dirt
15  /1573 : 1klrDyGRsCBPofZdqdaZp6 God Smack Alice In Chains Dirt


16  /1573 : 0rglK8l5QBSADvao8n4d2N Hate To Feel Alice In Chains Dirt
17  /1573 : 1mvEbRAlocvkJvqZIj3zHu Junkhead Alice In Chains Dirt


18  /1573 : 6a9SPVrXyrlVh5Fh08f8Bz Rain When I Die Alice In Chains Dirt
19  /1573 : 0wvIGFIgbyz4JNwQhZgTv2 Rooster Alice In Chains Dirt
20  /1573 : 5EnYdTx0BWhWM5YDcPkGAa Sickman Alice In Chains Dirt


21  /1573 : 4A065x9kJt955eGVqf813g Them Bones Alice In Chains Dirt
22  /1573 : 2s9xt247uEa0sXmAH0GvL3 Untitled Alice In Chains Dirt


23  /1573 : 5sFDReWLrZHLFZFjHsjUTS Would? Alice In Chains Dirt
artist:"Alkapote, Vald" track:"Plus haut - Les marches de lempeureur Saison 3 / épisode 1" album:"N/A" not in spotify
{'tracks': {'href': 'https://api.spotify.com/v1/search?query=artist%3A%22Alkapote%2C+Vald%22+track%3A%22Plus+haut+-+Les+marches+de+lempeureur+Saison+3+%2F+%C3%A9pisode+1%22+album%3A%22N%2FA%22&type=track&offset=0&limit=10', 'items': [], 'limit': 10, 'next': None, 'offset': 0, 'previous': None, 'total': 0}}


In [23]:
bad_format

['artist:"Alkapote, Vald" track:"Plus haut - Les marches de lempeureur Saison 3 / épisode 1" album:"N/A"',
 'artist:"Auracle" track:"Bombs Away Ballet" album:"City Slickers"',
 'artist:"Auracle" track:"City of Penetrating Light" album:"City Slickers"',
 'artist:"Auracle" track:"Honey" album:"City Slickers"',
 'artist:"Auracle" track:"Little City Slickers" album:"City Slickers"',
 'artist:"Auracle" track:"Rotary Andys Raggedy" album:"City Slickers"',
 'artist:"Auracle" track:"Sambanana" album:"City Slickers"',
 'artist:"Auracle" track:"Tied Shoes" album:"City Slickers"',
 'artist:"B.J. Thomas" track:"Rain Drops Keep Falling on My Head" album:"Rain Drops Keep Fallin On My Head"',
 'artist:"Black Sabbath" track:"Evil Woman" album:"Black Sabbath"',
 'artist:"Casiopea" track:"Galactic Funk" album:"Crosspoint"',
 'artist:"Casiopea" track:"Swear!" album:"Mint Jams"',
 'artist:"Claude François" track:"Cest la même chanson" album:"Cest la Même Chanson"',
 'artist:"Claude François" track:"17 ans