In [10]:
import os 
os.chdir('../src/')
print("Current working directory is now: ", os.getcwd())

import pandas as pd 
import numpy as np
from IPython.display import display

from d00_utils.load_confs import load_credentials, load_paths
from d00_utils.data_loader_sql import DataLoaderSQL
from d01_data_processing.compute_features import * 

creds = load_credentials()
paths = load_paths()

Current working directory is now:  /Users/Selen/Downloads/Project-master 2/src


In this notebook we will create functions to pull data from the VM database and process the data into features 

## Queried Data Into Features 

### Compute Features for One Individual

In [11]:
dl = DataLoaderSQL(creds=creds,
                   paths=paths)

listener_id = "ninjakichi"

In [12]:
for name in np.array(dl.select_from_table("select display_name from listeners")):
    print(name[0])


Caroline Wang
ninjakichi
Elise Brown
1214600613
Martha
bnativi17


#### Track related features 

In [13]:
track_cmd = f"""
        select track_pop, acousticness, danceability, energy, valence, 
        loudness, tempo, instrumentalness, speechiness, 
        mode, time_signature, liveness
        from tracks as t1
        where t1.track_id in (
                     select t2.track_id 
                      from toptracks as t2 
                      where listener_id='{listener_id}'and time_span='long_term')                  
            """

track_info = dl.select_from_table(sql=track_cmd)
cols = track_info.columns
track_info[cols] = track_info[cols].apply(pd.to_numeric, errors='coerce')
track_info.head()

Unnamed: 0,track_pop,acousticness,danceability,energy,valence,loudness,tempo,instrumentalness,speechiness,mode,time_signature,liveness
0,27,0.675,0.197,0.32,0.108,-16.696,83.893,0.748,0.0387,1,4,0.803
1,74,0.228,0.653,0.816,0.816,-4.353,178.085,0.0,0.167,1,4,0.0967
2,84,0.257,0.671,0.373,0.732,-18.064,92.717,7.9e-05,0.0323,1,4,0.0481
3,73,0.0901,0.446,0.952,0.624,-5.321,125.303,0.0857,0.0523,1,4,0.112
4,75,0.649,0.571,0.307,0.108,-10.958,83.72,0.0,0.0499,0,4,0.178


In [14]:
compute_track_features(track_info)

{'avg_acousticness': 0.3929256378809256,
 'avg_danceability': 0.48916036132256435,
 'avg_energy': 0.5486410965428605,
 'avg_valence': 0.4542311683244458,
 'avg_loudness': -11.638589651649696,
 'avg_tempo': 113.71194288184658,
 'avg_instrumentalness': 0.6085666645012218,
 'avg_speechiness': 0.06338867750876605,
 'avg_time_signature': 3.790978698752096,
 'avg_liveness': 0.16642599708315903,
 '%_major': 0.65}

#### Album related features 

In [15]:
artist_cmd = f"""
            select * 
            from artisthasgenre 
            natural join 
                (select a1.artist_id, a1.artist_pop, a1.num_followers 
                 from artists as a1 
                 where a1.artist_id in 
                    (select a2.artist_id 
                     from topartists as a2
                     where listener_id='ninjakichi' and time_span='long_term' ))
                 as artist_info
            """
artist_info = dl.select_from_table(sql=artist_cmd)
artist_info.head()

Unnamed: 0,artist_id,genre_name,artist_pop,num_followers
0,6qqNVTkY8uBg9cP3Jd7DAH,electropop,95,24535752
1,6qqNVTkY8uBg9cP3Jd7DAH,pop,95,24535752
2,3dRfiJ2650SZu6GbydcHNb,soundtrack,76,856056
3,1hCkSJcXREhrodeIHQdav8,german soundtrack,76,404611
4,1hCkSJcXREhrodeIHQdav8,scorecore,76,404611


In [16]:
features_artists_dict = compute_artists_features(artist_info=artist_info, 
                         genre_map=genre_map, 
                         num_tracks=num_tracks)
features_artists_dict

{'is_alternative': 0.1,
 'is_experimental': 0.0,
 'is_country': 0.0,
 'is_rock': 0.05,
 'is_metal': 0.05,
 'is_hip_hop': 0.0,
 'is_pop': 0.25,
 'is_r&b_soul': 0.0,
 'is_jazz': 0.0,
 'is_christian': 0.0,
 'is_latin': 0.0,
 'is_soundtrack': 0.6,
 'is_classical': 0.2,
 'is_contemporary': 0.0,
 'is_vocal': 0.0,
 'is_edm': 0.05,
 'is_chill': 0.05,
 'avg_artist_pop': 0.5918181818181818}

### Compute features for all individuals

In [17]:
track_cmd = """
          select toptrack_info.listener_id, 
               track_pop, acousticness, 
               danceability, energy, valence, 
               loudness, tempo, 
               instrumentalness, speechiness, 
               mode, time_signature, liveness
          from tracks as t1
          natural join 
            (select t2.track_id, t2.listener_id
             from toptracks as t2
             where time_span='long_term'
            )
          as toptrack_info
         """

top_track_info = dl.select_from_table(sql=track_cmd)

artist_cmd = """
           select * 
           from artisthasgenre 
           natural join 
              (select topartist_info.listener_id, a1.artist_id, a1.artist_pop, a1.num_followers 
               from artists as a1
               natural join 
                  (select topart.listener_id, topart.artist_id
                   from topartists as topart 
                   where time_span='long_term')
               as topartist_info) 
           as artist_info
           """

top_artist_info = dl.select_from_table(sql=artist_cmd)

In [20]:
pd.set_option('display.max_columns', None)

compute_features_all(top_track_info=top_track_info, 
                     top_artist_info=top_artist_info)

Unnamed: 0,%_major,avg_acousticness,avg_artist_pop,avg_danceability,avg_energy,avg_instrumentalness,avg_liveness,avg_loudness,avg_speechiness,avg_tempo,avg_time_signature,avg_valence,is_alternative,is_chill,is_christian,is_classical,is_contemporary,is_country,is_edm,is_experimental,is_hip_hop,is_jazz,is_latin,is_metal,is_pop,is_r&b_soul,is_rock,is_soundtrack,is_vocal,listener_id
0,0.75,0.475126,0.75254,0.524572,0.527363,0.002321,0.235782,-10.368643,0.171171,109.830473,3.939717,0.421719,0.2,0.0,0.25,0.0,0.05,0.0,0.25,0.0,0.35,0.0,0.0,0.0,0.3,0.25,0.15,0.05,0.0,elise_brown212
1,0.65,0.392926,0.591818,0.48916,0.548641,0.608567,0.166426,-11.63859,0.063389,113.711943,3.790979,0.454231,0.1,0.05,0.0,0.2,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.05,0.25,0.0,0.05,0.6,0.0,ninjakichi
2,0.7,0.47853,0.713452,0.432756,0.454417,0.278716,0.17372,-11.735498,0.070725,105.311053,3.615786,0.313477,0.3,0.0,0.0,0.25,0.05,0.05,0.3,0.05,0.15,0.0,0.0,0.0,0.65,0.15,0.15,0.0,0.0,314xcqarki42gnkosjtfplluumya
3,0.8,0.544269,0.774706,0.622776,0.48524,0.045821,0.175266,-9.43688,0.14916,103.136027,3.966982,0.416774,0.35,0.0,0.0,0.05,0.2,0.15,0.5,0.0,0.5,0.1,0.0,0.0,0.75,0.4,0.1,0.0,0.0,zan6n9pb4njsd6dm20zd5ptt4
4,0.5,0.024128,0.687041,0.584195,0.890237,0.285152,0.359696,-4.825964,0.096599,139.804837,4.0,0.287791,0.25,0.0,0.0,0.0,0.0,0.0,0.7,0.0,0.2,0.0,0.0,0.15,0.35,0.05,0.25,0.0,0.0,bnativi17
5,0.4,0.061465,0.809405,0.659335,0.77877,0.009379,0.21099,-4.92718,0.062699,123.459215,3.972206,0.456777,0.15,0.0,0.0,0.0,0.05,0.0,0.65,0.0,0.15,0.0,0.05,0.0,0.95,0.15,0.05,0.0,0.0,1214600613


In [21]:
all_features = compute_features_all(top_track_info=top_track_info, 
                     top_artist_info=top_artist_info)

In [22]:
list(all_features.columns)

['%_major',
 'avg_acousticness',
 'avg_artist_pop',
 'avg_danceability',
 'avg_energy',
 'avg_instrumentalness',
 'avg_liveness',
 'avg_loudness',
 'avg_speechiness',
 'avg_tempo',
 'avg_time_signature',
 'avg_valence',
 'is_alternative',
 'is_chill',
 'is_christian',
 'is_classical',
 'is_contemporary',
 'is_country',
 'is_edm',
 'is_experimental',
 'is_hip_hop',
 'is_jazz',
 'is_latin',
 'is_metal',
 'is_pop',
 'is_r&b_soul',
 'is_rock',
 'is_soundtrack',
 'is_vocal',
 'listener_id']

 Genre values for radar chart- values range from 0 to 1

 1 'is_alternative'
 
 2 'is_christian'
 
 3 'is_classical'
 
 4 'is_contemporary'
 
 5 'is_country'
 
 6 'is_edm'
 
 7 'is_experimental'
 
 8 'is_hip_hop'
 
 9 'is_jazz'
 
 10 'is_latin'
 
 11 'is_metal'
 
 12 'is_pop'
 
 13 'is_r&b_soul'
 
 14 'is_rock'


 Need two radar charts for genre
 
 pop, rock, classical, country, edm, hip-hop, r&b
 
 alternative, christian, contemporary, experimental, jazz, metal, latin

 Song quality values- values range from 0 to 1

 'avg_acousticness',
 'avg_danceability',
 'avg_energy',
 'avg_instrumentalness'

 Standalone values- maybe display in a different way
 
 'avg_artist_pop'
 'avg_tempo'

In [32]:
import plotly.express as px

af = all_features

genre_categories_1 = ['is_pop', 'is_hip_hop', 'is_rock', 'is_classical', 'is_country', 'is_edm','is_r&b_soul']
genre_categories_2 = ['is_alternative', 'is_christian', 'is_contemporary', 'is_experimental', 'is_jazz', 'is_metal', 'is_latin']
song_quality_categories = ['avg_acousticness','avg_danceability','avg_energy','avg_instrumentalness',]

# Add in labels so that labels show instead of column names
# Was mot working when I set 
# labels=genre_labels_1 when making fig1,
# labels=genre_labels_2 when making fig2, and
# labels=song_quality_labels when making fig3

genre_labels_1 = {
    'is_pop':'Pop',
    'is_hip_hop':'Hip-Hop',
    'is_rock':'Rock',
    'is_classicial':'Classical',
    'is_country':'Country',
    'is_r&b_soul':'R&B'
}
genre_labels_2 = {
    'is_alternative':'Alternative',
    'is_christian':'Christian',
    'is_contemporary':'Contemporary',
    'is_experimental':'Experimental',
    'is_jazz':'Jazz',
    'is_metal':'Metal',
    'is_latin':'Latin'
}
song_quality_labels = {
    'avg_acousticness':'Acousticness',
    'avg_danceability':'Danceability',
    'avg_energy':'Energy',
    'avg_instrumental_ness':'Instrumentalness'
}

for row in all_features.index:
    genre_values_1 = [af['is_pop'][row], af['is_hip_hop'][row], af['is_rock'][row], af['is_classical'][row], af['is_country'][row], af['is_edm'][row], af['is_r&b_soul'][row]]
    genre_values_2 = [af['is_alternative'][row], af['is_christian'][row], af['is_contemporary'][row], af['is_experimental'][row], af['is_jazz'][row], af['is_metal'][row], af['is_latin'][row]]
    song_quality_values = [af['avg_acousticness'][row], af['avg_danceability'][row], af['avg_energy'][row], af['avg_instrumentalness'][row]]
    
    print(af['listener_id'][row])
    
    fig1 = px.line_polar(af, r=genre_values_1, theta=genre_categories_1, line_close=True, range_r=[0,1])
    fig1.update_traces(fill='toself')
    fig1.show()
    fig2 = px.line_polar(af, r=genre_values_2, theta=genre_categories_2, line_close=True, range_r=[0,1])
    fig2.update_traces(fill='toself')
    fig2.show()
    fig3 = px.line_polar(af, r=song_quality_values, theta=song_quality_categories, line_close=True, range_r=[0,1])
    fig3.update_traces(fill='toself')
    fig3.show()
    

elise_brown212


ninjakichi


314xcqarki42gnkosjtfplluumya


zan6n9pb4njsd6dm20zd5ptt4


bnativi17


1214600613
