# Import Dependencies

In [1]:
import pickle
import pandas as pd
import numpy as np
import os

# Predict Function (DataFrame)

In [2]:
def predictGenre(test_track):
    #Load Machine Learning Model
    loaded_scaler = pickle.load(open('Website/Models/XGBoost_scaler.sav', 'rb'))
    loaded_model = pickle.load(open('Website/Models/XGBoost_model.sav', 'rb'))

    #Make Predictions
    newData_scaled = loaded_scaler.transform(test_track)
    probs_test = loaded_model.predict_proba(newData_scaled)

    #Map In Genres
    genre = ['classical','country','electronic','indie','jazz','latin','pop','r&b','rap','rock','show_tunes','worship']
    preds_test = pd.DataFrame(probs_test)
    preds_test.columns=genre
    # print(preds_test)

    # Identify Genre with Highest Probability
    probability = preds_test.max(axis=1)
    probability.values[0]

    predicted_genre = preds_test.idxmax(axis=1)
    print(f'Test track has a {probability.values[0]} probability of being {predicted_genre.values[0]}') 
    
    #Return Predicted Genre
    return predicted_genre.values[0]

# Predict Function (Dictionary)

In [3]:
def predictGenreDict(test_dict):
    
    #Load Machine Learning Model
    loaded_scaler = pickle.load(open('Website/Models/XGBoost_scaler.sav', 'rb'))
    loaded_model = pickle.load(open('Website/Models/XGBoost_model.sav', 'rb'))

    #Create Dataframe from Dictionary
    test_df = pd.DataFrame(test_dict, index=['0',])
    test_df = test_df[['acousticness','danceability','duration_ms','energy','instrumentalness',
                 'liveness','loudness','speechiness','tempo','valence','popularity','key','mode']]
    
    #Make Predictions
    newData_scaled = loaded_scaler.transform(test_df)
    probs_test = loaded_model.predict_proba(newData_scaled)

    #Map In Genres
    genre = ['classical','country','electronic','indie','jazz','latin','pop','r&b','rap','rock','show_tunes','worship']
    preds_test = pd.DataFrame(probs_test)
    preds_test.columns=genre
    # print(preds_test)

    # Identify Genre with Highest Probability
    probability = preds_test.max(axis=1)
    probability.values[0]

    predicted_genre = preds_test.idxmax(axis=1)
    print(f'Test track has a {probability.values[0]} probability of being {predicted_genre.values[0]}') 
    
    #Return Predicted Genre
    return predicted_genre.values[0]

# Predict Function (Individual Audio Feature Parameters)

In [4]:
def predictGenreIndividual(acoustic, dance, duration, energy, instrumental, live, loud, speech, temp, val, popularity, key, mode):
    
    #Create Dataframe with Test Track Information
    test_dict = {
    "acousticness": acoustic,
    "danceability": dance,
    "duration_ms": duration,
    "energy": energy,
    "instrumentalness": instrumental,
    "liveness": live,
    "loudness": loud,
    "speechiness": speech,
    "tempo": temp,
    "valence": val,
    "popularity": popularity,
    "key": key,
    "mode": mode
    }
    test_track = pd.DataFrame(test_dict, index=[0,])

    #Load Machine Learning Model
    loaded_scaler = pickle.load(open('Website/Models/XGBoost_scaler.sav', 'rb'))
    loaded_model = pickle.load(open('Website/Models/XGBoost_model.sav', 'rb'))

    #Make Predictions
    newData_scaled = loaded_scaler.transform(test_track)
    probs_test = loaded_model.predict_proba(newData_scaled)

    #Map In Genres
    genre = ['classical','country','electronic','indie','jazz','latin','pop','r&b','rap','rock','show_tunes','worship']
    preds_test = pd.DataFrame(probs_test)
    preds_test.columns=genre
    # print(preds_test)

    # Identify Genre with Highest Probability
    probability = preds_test.max(axis=1)
    probability.values[0]

    predicted_genre = preds_test.idxmax(axis=1)
    print(f'Test track has a {probability.values[0]} probability of being {predicted_genre.values[0]}') 
    
    #Return Predicted Genre
    return predicted_genre.values[0]

In [5]:
predictGenreIndividual(0.795, 0.444, 208155,0.309, 0.132, 0.352, -10.956, 0.062, 104.745, 0.0875, 56, 8, 1)

Test track has a 0.3910420835018158 probability of being indie


'indie'

# Test Songs not within the Original Dataset

### Pop / Billie Eilish

- Track Title: My Future
- Release Date: July 30th 2020
- Spotify Link: https://open.spotify.com/track/2ygvZOXrIeVL4xZmAWJT2C
- Spotify API Audio Features: https://developer.spotify.com/console/get-audio-features-track/?id=2ygvZOXrIeVL4xZmAWJT2C

In [6]:
billie_elilish = {
  "danceability": 0.444,
  "energy": 0.309,
  "key": 8,
  "loudness": -10.956,
  "mode": 1,
  "speechiness": 0.062,
  "acousticness": 0.795,
  "instrumentalness": 0.132,
  "liveness": 0.352,
  "valence": 0.0875,
  "tempo": 104.745,
  "duration_ms": 208155,
  "popularity": 56
}

In [7]:
predictGenreDict(billie_elilish)

Test track has a 0.3910420835018158 probability of being indie


'indie'

### Pop / Alanis Morissette

- Track Title: Reckoning
- Spotify Link: https://open.spotify.com/track/6GYc359apC4J1xMJc3GMNv

In [8]:
alanis_morissette = {
  "danceability": 0.31,
  "energy": 0.627,
  "key": 10,
  "loudness": -6.679,
  "mode": 0,
  "speechiness": 0.0429,
  "acousticness": 0.621,
  "instrumentalness": 0.0000954,
  "liveness": 0.13,
  "valence": 0.158,
  "tempo": 118.319,
  "duration_ms": 205400,
  "popularity": 56
}

In [9]:
predictGenreDict(alanis_morissette)

Test track has a 0.3598160445690155 probability of being pop


'pop'

### Pop / Taylor Swift

- Track Title: Cardigan
- Spotify Link: https://open.spotify.com/track/4R2kfaDFhslZEMJqAFNpdd

In [10]:
taylor_swift = {
  "danceability": 0.613,
  "energy": 0.581,
  "key": 0,
  "loudness": -8.588,
  "mode": 0,
  "speechiness": 0.0424,
  "acousticness": 0.537,
  "instrumentalness": 0.000345,
  "liveness": 0.25,
  "valence": 0.551,
  "tempo": 130.033,
  "duration_ms": 239560,
  "popularity": 56
}

In [11]:
predictGenreDict(taylor_swift)

Test track has a 0.32804417610168457 probability of being latin


'latin'

### Pop / Justin Bieber ft Quavo

- Title Track: Intentions (feat. Quavo)
- Spotify Link: https://open.spotify.com/track/364dI1bYnvamSnBJ8JcNzN

In [12]:
justin_bieber = {
  "danceability": 0.811,
  "energy": 0.553,
  "key": 9,
  "loudness": -6.644,
  "mode": 1,
  "speechiness": 0.0552,
  "acousticness": 0.317,
  "instrumentalness": 0,
  "liveness": 0.105,
  "valence": 0.86,
  "tempo": 148.014,
  "duration_ms": 212869,
  "popularity": 56
}

In [13]:
predictGenreDict(justin_bieber)

Test track has a 0.8057001829147339 probability of being latin


'latin'

### R&B / John Legend

- Track Title: Wild (feat. Gary Clark Jr.)
- Spotify Link: https://open.spotify.com/track/4rVW6XqAsSaf5vOwc8FREW

In [14]:
john_legend = {
  "danceability": 0.557,
  "energy": 0.585,
  "key": 8,
  "loudness": -6.201,
  "mode": 1,
  "speechiness": 0.0431,
  "acousticness": 0.036,
  "instrumentalness": 0,
  "liveness": 0.176,
  "valence": 0.305,
  "tempo": 63.392,
  "duration_ms": 196907,
  "popularity": 56
}

In [15]:
predictGenreDict(john_legend)

Test track has a 0.48351383209228516 probability of being pop


'pop'

### R&B / PARTYNEXTDOOR ft Rihanna 

- Track Title: BELIEVE IT
- Spotify Link: https://open.spotify.com/track/4HDCLYli2SUdkq9OjmvhSD

In [16]:
partynextdoor = {
  "danceability": 0.43,
  "energy": 0.7,
  "key": 6,
  "loudness": -5.35,
  "mode": 1,
  "speechiness": 0.264,
  "acousticness": 0.0902,
  "instrumentalness": 0,
  "liveness": 0.216,
  "valence": 0.731,
  "tempo": 158.373,
  "duration_ms": 183219,
  "popularity": 56
}

In [17]:
predictGenreDict(partynextdoor)

Test track has a 0.45507097244262695 probability of being pop


'pop'

### Rap / DJ Khaled ft. Drake

- Track Title: POPSTAR
- Spotify Link: https://open.spotify.com/track/6EDO9iiTtwNv6waLwa1UUq

In [18]:
dj_khaled = {
  "danceability": 0.8,
  "energy": 0.56,
  "key": 5,
  "loudness": -4.818,
  "mode": 0,
  "speechiness": 0.261,
  "acousticness": 0.057,
  "instrumentalness": 0,
  "liveness": 0.134,
  "valence": 0.45,
  "tempo": 163.071,
  "duration_ms": 200221,
  "popularity": 56
}

In [19]:
predictGenreDict(dj_khaled)

Test track has a 0.7897329330444336 probability of being rap


'rap'

### Rap / Drake ft. Lil Durk

- Title Track: Laugh Now Cry Later (feat. Lil Durk)
- Spotify Link: https://open.spotify.com/track/2SAqBLGA283SUiwJ3xOUVI

In [20]:
drake = {
  "danceability": 0.761,
  "energy": 0.518,
  "key": 0,
  "loudness": -8.871,
  "mode": 1,
  "speechiness": 0.134,
  "acousticness": 0.244,
  "instrumentalness": 0.0000347,
  "liveness": 0.107,
  "valence": 0.522,
  "tempo": 133.976,
  "duration_ms": 261493,
  "popularity": 56
}

In [21]:
predictGenreDict(drake)

Test track has a 0.550229012966156 probability of being rap


'rap'

### Rock / Bon Jovi

- Track Title: Limitless
- Spotify Link: https://open.spotify.com/track/71JuDGXgyY7MbmXtldZ4C3

In [22]:
bon_jovi = {
  "danceability": 0.438,
  "energy": 0.826,
  "key": 10,
  "loudness": -5.454,
  "mode": 1,
  "speechiness": 0.075,
  "acousticness": 0.00586,
  "instrumentalness": 0.00000918,
  "liveness": 0.362,
  "valence": 0.378,
  "tempo": 144.149,
  "duration_ms": 221947,
  "popularity": 56
}

In [23]:
predictGenreDict(bon_jovi)

Test track has a 0.5546531677246094 probability of being rock


'rock'

### Rock / Elton John,Ozzy Osbourne, Will Malone

- Track Title: Ordinary Man (feat. Elton John)
- Spotify Link: https://open.spotify.com/track/6eEYGGFfFbtKHCgJM4uh9v

In [24]:
elton_john = {
  "danceability": 0.428,
  "energy": 0.599,
  "key": 4,
  "loudness": -2.135,
  "mode": 1,
  "speechiness": 0.0286,
  "acousticness": 0.0665,
  "instrumentalness": 0.00000986,
  "liveness": 0.137,
  "valence": 0.134,
  "tempo": 125.83,
  "duration_ms": 301730,
  "popularity": 56
}

In [25]:
predictGenreDict(elton_john)

Test track has a 0.3190756142139435 probability of being pop


'pop'

### Classical / Chad Lawson

- Genre: Classical
- Track Title: In the Waiting
- Spotify Link: https://open.spotify.com/track/0WzetzBykY3fl41dEJjoYa

In [26]:
chad_lawson = {
  "danceability": 0.337,
  "energy": 0.0111,
  "key": 0,
  "loudness": -27.273,
  "mode": 1,
  "speechiness": 0.0456,
  "acousticness": 0.991,
  "instrumentalness": 0.958,
  "liveness": 0.0886,
  "valence": 0.148,
  "tempo": 75.69,
  "duration_ms": 159208,
  "popularity": 56
}

In [27]:
predictGenreDict(chad_lawson)

Test track has a 0.9713071584701538 probability of being classical


'classical'

### Country / Rascal Flatts

- Track Title: How They Remember You
- Spotify Link: https://open.spotify.com/track/3qwPYg8xTWPafQD3YUNaTU

In [28]:
rascall_flats = {
  "danceability": 0.442,
  "energy": 0.8,
  "key": 2,
  "loudness": -5.12,
  "mode": 1,
  "speechiness": 0.0368,
  "acousticness": 0.0124,
  "instrumentalness": 0,
  "liveness": 0.0785,
  "valence": 0.498,
  "tempo": 152.004,
  "duration_ms": 211707,
  "popularity": 56
}

In [29]:
predictGenreDict(rascall_flats)

Test track has a 0.32943424582481384 probability of being rock


'rock'

### Country / Tenille Townes

- Track Title: Holding Out for the One
- Spotify Link: https://open.spotify.com/track/7IN4uBTZZK2KJ4tjBL44xU

In [30]:
tenille_townes = {
  "danceability": 0.6,
  "energy": 0.742,
  "key": 6,
  "loudness": -4.135,
  "mode": 1,
  "speechiness": 0.0379,
  "acousticness": 0.00716,
  "instrumentalness": 0,
  "liveness": 0.363,
  "valence": 0.796,
  "tempo": 85.008,
  "duration_ms": 195187,
  "popularity": 56
}

In [31]:
predictGenreDict(tenille_townes)

Test track has a 0.5605005621910095 probability of being country


'country'

### Electronic / David Guetta ft Sia 

- Track Title: Let's Love
- Spotify Link: https://open.spotify.com/track/6lhZLbb0czULpjb2kFryPS?si=1kIvoYZ_R7is26bsG9gVyA

In [32]:
david_guerra = {
  "danceability": 0.662,
  "energy": 0.868,
  "key": 5,
  "loudness": -4.025,
  "mode": 1,
  "speechiness": 0.0341,
  "acousticness": 0.00882,
  "instrumentalness": 0.00244,
  "liveness": 0.584,
  "valence": 0.353,
  "tempo": 92.998,
  "duration_ms": 200645,
  "popularity": 56
}

In [33]:
predictGenreDict(david_guerra)

Test track has a 0.25189173221588135 probability of being pop


'pop'

### Indie / Giant Rooks

- Track Title: Misinterpretations
- Spotify Link: https://open.spotify.com/track/7a4iLgRkkk6Xb7M6yc9Rh2

In [34]:
giant_rocks = {
  "danceability": 0.53,
  "energy": 0.869,
  "key": 10,
  "loudness": -3.542,
  "mode": 0,
  "speechiness": 0.0425,
  "acousticness": 0.0199,
  "instrumentalness": 0.000365,
  "liveness": 0.13,
  "valence": 0.531,
  "tempo": 153.06,
  "duration_ms": 292733,
  "popularity": 56
}

In [35]:
predictGenreDict(giant_rocks)

Test track has a 0.33850231766700745 probability of being rock


'rock'

### Jazz / Instrumental Jazz Music Guys

- Track Title: Just Smile
- Spotify Link: https://open.spotify.com/track/4IG1XgvlkeWmTK00mdNGB0

In [36]:
instrumental_Jazz_music_guys = {
  "danceability": 0.599,
  "energy": 0.177,
  "key": 0,
  "loudness": -14.621,
  "mode": 1,
  "speechiness": 0.0509,
  "acousticness": 0.98,
  "instrumentalness": 0.577,
  "liveness": 0.138,
  "valence": 0.507,
  "tempo": 115.325,
  "duration_ms": 258341,
  "popularity": 56
}

In [37]:
predictGenreDict(instrumental_Jazz_music_guys)

Test track has a 0.3057435154914856 probability of being pop


'pop'

### Latin / Christian Nodal

- Title Track: Nace Un Borracho
- Spotify Link: https://open.spotify.com/track/6SnO4yCCYtz2rffMpEAlro

In [38]:
christian_nodal = {
  "danceability": 0.626,
  "energy": 0.47,
  "key": 0,
  "loudness": -2.984,
  "mode": 1,
  "speechiness": 0.0316,
  "acousticness": 0.522,
  "instrumentalness": 0,
  "liveness": 0.201,
  "valence": 0.825,
  "tempo": 140.046,
  "duration_ms": 176347,
  "popularity": 56
}

In [39]:
predictGenreDict(christian_nodal)

Test track has a 0.9786537289619446 probability of being latin


'latin'

### Show Tunes / John Williams London Symphony Orchestra

- Track Title: Cantina Band
- Spotify Link: https://open.spotify.com/track/5ZSAdkQb23NPIcUGt6exdm

In [40]:
lon_orchestra = {
  "danceability": 0.699,
  "energy": 0.419,
  "key": 0,
  "loudness": -15.133,
  "mode": 1,
  "speechiness": 0.134,
  "acousticness": 0.255,
  "instrumentalness": 0.0239,
  "liveness": 0.0471,
  "valence": 0.822,
  "tempo": 131.629,
  "duration_ms": 164307,
  "popularity": 56
}

In [41]:
predictGenreDict(lon_orchestra)

Test track has a 0.4520324766635895 probability of being rock


'rock'

### Show Tunes / Original Broadway Cast Of Aint Too Proud

- Track Title: Get Ready
- Spotify Link: https://open.spotify.com/track/7xSFdlmMq6Nbamxvfpjg3t

In [42]:
broadway = {
  "danceability": 0.674,
  "energy": 0.656,
  "key": 5,
  "loudness": -7.171,
  "mode": 1,
  "speechiness": 0.121,
  "acousticness": 0.107,
  "instrumentalness": 0,
  "liveness": 0.369,
  "valence": 0.903,
  "tempo": 139.992,
  "duration_ms": 101708,
  "popularity": 56
}

In [43]:
predictGenreDict(broadway)

Test track has a 0.23078803718090057 probability of being show_tunes


'show_tunes'

### Worship / Kirk Franklin

- Track Title: Love Theory
- Spotify Link: https://open.spotify.com/track/08XnGAqrSwqr08vJ0t4py8

In [44]:
kirk_franklin= {
  "danceability": 0.745,
  "energy": 0.677,
  "key": 1,
  "loudness": -5.355,
  "mode": 1,
  "speechiness": 0.215,
  "acousticness": 0.181,
  "instrumentalness": 0,
  "liveness": 0.0895,
  "valence": 0.72,
  "tempo": 97.092,
  "duration_ms": 251258,
  "popularity": 56
}

In [45]:
predictGenreDict(kirk_franklin)

Test track has a 0.2966955006122589 probability of being pop


'pop'

### Worship / Shane & Shane The Worship Initiative

- Track Title: "Behold The Lamb"
- Spotify Link: https://open.spotify.com/track/2L6oInj5KFDnxQs9cUIS6Q

In [46]:
shane_shane = {
  "danceability": 0.426,
  "energy": 0.388,
  "key": 0,
  "loudness": -12.084,
  "mode": 1,
  "speechiness": 0.0279,
  "acousticness": 0.166,
  "instrumentalness": 0,
  "liveness": 0.102,
  "valence": 0.268,
  "tempo": 75.021,
  "duration_ms": 305055,
  "popularity": 56
}

In [47]:
predictGenreDict(shane_shane)

Test track has a 0.34584841132164 probability of being pop


'pop'

# Summary Predicted Genres from Tracks outside the Original Dataset

In [48]:
feature_data = [billie_elilish,alanis_morissette, taylor_swift, justin_bieber, john_legend, partynextdoor, 
                dj_khaled, drake, bon_jovi, elton_john, chad_lawson, rascall_flats, tenille_townes, 
                david_guerra, giant_rocks, instrumental_Jazz_music_guys, christian_nodal, lon_orchestra, 
                broadway, kirk_franklin, shane_shane]
summary_df = pd.DataFrame(feature_data, index=['billie_elilish','alanis_morissette', 'taylor_swift', 
                                               'justin_bieber', 'john_legend', 'partynextdoor', 'dj_khaled', 
                                               'drake', 'bon_jovi', 'elton_john', 'chad_lawson', 'rascall_flats', 
                                               'tenille_townes', 'david_guerra', 'giant_rocks', 
                                               'instrumental_Jazz_music_guys', 'christian_nodal', 'lon_orchestra',
                                               'broadway', 'kirk_franklin', 'shane_shane'])

listed_genre = ['pop','pop','pop','pop','r&b','r&b','rap','rap','rock','rock','classical','country',
                'country','electronic','indie','jazz','latin','show_tunes','show_tunes','worship','worship']
predicted_genre = ['indie','pop','latin','latin','pop','pop','rap','rap','rock','pop','classical','rock',
                   'country','pop','rock','pop','latin','rock','show_tunes','pop','pop']

#Insert Listed Genre
summary_df['Listed_Genre']=listed_genre
#Insert Genre Predicted by ML Model
summary_df['Predicted_Genre'] = predicted_genre
#Rearrange Columns
summary_df = summary_df[['Listed_Genre', 'Predicted_Genre','danceability', 'energy', 'key', 'loudness', 
                         'mode','speechiness','acousticness', 'instrumentalness', 'liveness', 'valence', 
                         'tempo', 'duration_ms', 'popularity']]
summary_df

Unnamed: 0,Listed_Genre,Predicted_Genre,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,popularity
billie_elilish,pop,indie,0.444,0.309,8,-10.956,1,0.062,0.795,0.132,0.352,0.0875,104.745,208155,56
alanis_morissette,pop,pop,0.31,0.627,10,-6.679,0,0.0429,0.621,9.5e-05,0.13,0.158,118.319,205400,56
taylor_swift,pop,latin,0.613,0.581,0,-8.588,0,0.0424,0.537,0.000345,0.25,0.551,130.033,239560,56
justin_bieber,pop,latin,0.811,0.553,9,-6.644,1,0.0552,0.317,0.0,0.105,0.86,148.014,212869,56
john_legend,r&b,pop,0.557,0.585,8,-6.201,1,0.0431,0.036,0.0,0.176,0.305,63.392,196907,56
partynextdoor,r&b,pop,0.43,0.7,6,-5.35,1,0.264,0.0902,0.0,0.216,0.731,158.373,183219,56
dj_khaled,rap,rap,0.8,0.56,5,-4.818,0,0.261,0.057,0.0,0.134,0.45,163.071,200221,56
drake,rap,rap,0.761,0.518,0,-8.871,1,0.134,0.244,3.5e-05,0.107,0.522,133.976,261493,56
bon_jovi,rock,rock,0.438,0.826,10,-5.454,1,0.075,0.00586,9e-06,0.362,0.378,144.149,221947,56
elton_john,rock,pop,0.428,0.599,4,-2.135,1,0.0286,0.0665,1e-05,0.137,0.134,125.83,301730,56


# Export Prediction Results

In [49]:
summary_df.to_csv(r'DataSource/prediction_results.csv')