In [1]:
import os
from dotenv import load_dotenv, find_dotenv
import base64
import requests
import pandas as pd
from pyspark.sql import SparkSession 
import pyspark.sql.functions as fun
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.classification import LogisticRegressionModel

In [2]:
spark = SparkSession.builder.master("local[2]").appName("ML_MODEL").getOrCreate()

23/07/17 13:15:17 WARN Utils: Your hostname, Raveeshs-MacBook-Pro.local resolves to a loopback address: 127.0.0.1; using 172.20.10.2 instead (on interface en0)
23/07/17 13:15:17 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
23/07/17 13:15:18 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [3]:
spark

In [4]:
load_dotenv(find_dotenv())
CLIENT_ID = os.getenv("CLIENT_ID")
CLIENT_SECRET = os.getenv("CLIENT_SECRET")

In [5]:
def getToken():
    authString = f"{CLIENT_ID}:{CLIENT_SECRET}"
    authBytes = authString.encode("utf-8")
    authBase64 = str(base64.b64encode(authBytes), "utf-8")

    url = "https://accounts.spotify.com/api/token"
    tokenHeaders = {
        "Authorization" : f"Basic {authBase64}",
        "Content-Type" : "application/x-www-form-urlencoded"
    }
    tokenData = {
        "grant_type" : "client_credentials"
    }
    tokenResponse = requests.post(
        url,
        headers=tokenHeaders,
        data=tokenData
    )
    tokenData = tokenResponse.json()
    TOKEN = tokenData['access_token']
    return TOKEN

TOKEN = getToken()

In [29]:
def getTrack(token, song_name, artist_name):
    url = "https://api.spotify.com/v1/search"
    query = f"?q=track:{song_name}%20artist:{artist_name}&type=track&limit=1"
    queryURL = url+query
    songHeaders = {
        "Authorization" : f"Bearer {token}"
    }
    songResponse = requests.get(
        queryURL,
        headers=songHeaders
    )
    songData = songResponse.json()
    songDF = pd.json_normalize(songData['tracks'], record_path='items')
    ID = songDF['id'].iloc[0]
    return ID

songID = getTrack(TOKEN, "I hate the way", "Sofia Carson")

In [30]:
print(songID)

3cN3ze6ErUKT3xghdZPQCw


In [31]:
def getSongFeatures(token, songID):
    url = f'https://api.spotify.com/v1/audio-features?ids={songID}'
    songFeaturesHeaders = {
        "Authorization" : f"Bearer {token}"
    }
    featureResponse = requests.get(
        url,
        headers=songFeaturesHeaders
    )
    data = featureResponse.json()
    dataDF = pd.json_normalize(data['audio_features'])
    dataDF = dataDF.iloc[:, :11]
    dataRaw = spark.createDataFrame(dataDF)
    return dataRaw

In [32]:
features = getSongFeatures(TOKEN, songID)

In [33]:
songPredictionModel = LogisticRegressionModel.load('songPredictionModel')

In [34]:
def songPredict(features, songPredictionModel):
    feature_names = features.columns[:]
    assembler = VectorAssembler(inputCols=feature_names, outputCol="features")
    testingData = assembler.transform(features)
    predictionDF = songPredictionModel.transform(testingData)
    prediction = predictionDF.first()["prediction"]
    return prediction

In [35]:
pred = songPredict(features, songPredictionModel)
pred

0.0