In [1]:
pip install spotipy

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
import random
import altair as alt
import requests
import inspect
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import networkx as nx
import networkx.algorithms.community as nx_comm
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import pyvis
from pyvis import network as net
from itertools import combinations
from community import community_louvain
from copy import deepcopy
import plotly.graph_objects as go
import plotly.offline as pyo

In [3]:
# storing the credentials:
CLIENT_ID = "116bae2a86fd4737862816c5f45d4c36"
CLIENT_SECRET = "4f4a732d83d04cfa94acc26d2b77169f"
my_username = "4uxd1wpjsxqvlpni1ixj09ipn"
# instantiating the client
# source: Max Hilsdorf (https://towardsdatascience.com/how-to-create-large-music-datasets-using-spotipy-40e7242cc6a6)
client_credentials_manager = SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [4]:
# this function is created based on Max Hilsdorf's
# source: https://towardsdatascience.com/how-to-create-large-music-datasets-using-spotipy-40e7242cc6a6

def get_audio_features_df(playlist):
    
    # Create empty dataframe
    playlist_features_list = ["artist", "album", "track_name", "track_id","danceability","energy","key","loudness","mode", "speechiness","instrumentalness","liveness","valence","tempo", "duration_ms","time_signature"]
    playlist_df = pd.DataFrame(columns = playlist_features_list)
    
    # Loop through every track in the playlist, extract features and append the features to the playlist df
    for track in playlist["items"]:
        # Create empty dict
        playlist_features = {}
        # Get metadata
        playlist_features["artist"] = track["track"]["album"]["artists"][0]["name"]
        playlist_features["album"] = track["track"]["album"]["name"]
        playlist_features["track_name"] = track["track"]["name"]
        playlist_features["track_id"] = track["track"]["id"]
        
        # Get audio features
        audio_features = sp.audio_features(playlist_features["track_id"])[0]
        for feature in playlist_features_list[4:]:
            playlist_features[feature] = audio_features[feature]
        
        # Concat the dfs
        track_df = pd.DataFrame(playlist_features, index = [0])
        playlist_df = pd.concat([playlist_df, track_df], ignore_index = True)
        
    return playlist_df

In [5]:
meditation_tracks = pd.DataFrame(sp.user_playlist_tracks("ryankredell", "4hL4BxaNoBHiPfoDgQeqUA"))
basement_gym_tracks = pd.DataFrame(sp.user_playlist_tracks("lqo6rtvo820c0ergub92yuurs", "7hIYT6eLgtTgFeEwaGqY6l"))

In [6]:
med_audio_features = get_audio_features_df(meditation_tracks)
med_audio_features["Playlist Name"] = "Meditation & Reflection"

gym_audio_features = get_audio_features_df(basement_gym_tracks)
gym_audio_features["Playlist Name"] = "Dad's Basement Gym"

med_plus_gym = pd.concat([med_audio_features, gym_audio_features], ignore_index=True)

In [7]:
med_plus_gym

Unnamed: 0,artist,album,track_name,track_id,danceability,energy,key,loudness,mode,speechiness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,Playlist Name
0,((( O ))),((( 2 ))),iFeel,7ilx6MENwjH6tUEt9kOH4S,0.608,0.411,3,-10.07,1,0.0344,0.912,0.154,0.104,149.979,269638,4,Meditation & Reflection
1,KAYTRANADA,NOTHIN LIKE U / CHANCES,IT WAS MEANT 2 B,1nDhL6yVejczIo4SwzUyvO,0.827,0.577,1,-8.119,0,0.202,0.259,0.154,0.489,94.908,240147,4,Meditation & Reflection
2,FKJ,French Kiwi Juice,Blessed,6VE2189Tetcqmaxrrf4if3,0.5,0.601,6,-8.87,1,0.0435,0.48,0.0901,0.384,97.971,265392,4,Meditation & Reflection
3,Medasin,Irene,Home,42NB7w0tDCag0mQpCtZe7Y,0.66,0.45,4,-11.011,1,0.0451,0.714,0.374,0.242,145.011,194547,4,Meditation & Reflection
4,((( O ))),((( 3 ))),fall in me,6gidVEFLd5oX25RL6n579O,0.464,0.0582,5,-17.912,0,0.0293,0.0563,0.105,0.174,87.032,133174,4,Meditation & Reflection
5,((( O ))),((( 1 ))),Nature's Joint,7hq5EkXfgUcIuxSpwx3hV1,0.377,0.24,4,-11.468,1,0.0389,0.031,0.0975,0.088,101.152,285545,3,Meditation & Reflection
6,Cookin Soul,Boa Bunda,Sushi Masta,7J6C7HMZS0qYN4XWie6lnA,0.603,0.556,6,-7.297,0,0.0373,0.92,0.171,0.617,80.055,156000,4,Meditation & Reflection
7,Kenny Beats,LOUIE,Hooper,2QK35TtLvYlOpV2zjw6vtt,0.635,0.786,1,-7.5,0,0.237,0.00567,0.773,0.356,135.12,87093,4,Meditation & Reflection
8,Alabama Shakes,Sound & Color,This Feeling,3IBhpCAFOJxynTfrw0t2LN,0.628,0.329,7,-11.424,1,0.036,0.00335,0.11,0.305,130.808,268853,4,Meditation & Reflection
9,Masego,The Pink Polo EP,Sunday Vibes,3l75jB2gKi4VpgklWIbTOz,0.691,0.67,1,-4.471,1,0.0644,0.764,0.117,0.22,170.066,226168,4,Meditation & Reflection


As a brief note, each of us (Elise and Jacob) 

# **Comparing Disparate Playlists** 

(These parts were primarily documented by Jacob) With this, we wanted to see how a particular audio feature may be correlated with the overall mood of a playlist. In particular, we looked at the playlists "Dad's Basement Gym" and "Meditation and Reflection." The names alone make it clear that the two are going for quite different moods. Since "Dad's Basement Gym" is meant to be pump up music for working out and "Meditation and Reflection" is meant to be relaxing, we thought that energy would be a good variable to look at. The resulting graph shows clearly that "Dad's Basement Gym" in general is more energetic than "Meditation and Reflection".

The coding was not all that interesting, as it was identical to one of the examples in the Spotify API notebook.

In [8]:
alt.Chart(med_plus_gym).mark_point().encode(
    x=alt.X("track_name", sort=None),
    y='energy',
    color="Playlist Name",
    tooltip=["artist", "track_name"]
).properties(
    width=500
)

**Tempo Between Playlists**

Here we wanted to see if there was a meaningful difference in tempo between the two playlists. Interestingly, there is not a significant difference. "Meditation and Reflection" is only very slightly slower than "Dad's Basement Gym" on average.

In [9]:
alt.Chart(med_plus_gym).mark_point().encode(
    x=alt.X("track_name", sort=None),
    y='tempo',
    color="Playlist Name",
    tooltip=["artist", "track_name"]
).properties(
    width=500
)

**Energy vs. Loudness**

Finally, we looked at energy vs loudness for each playlist. The graph shows a clear strong postitive correlation, confirmed by the calculation below, giving r = ~0.85. This means that loudness is a good predictor for energy.

In [10]:
alt.Chart(med_plus_gym).mark_point().encode(
    x=alt.X("loudness", sort=None),
    y='energy',
    color="Playlist Name",
    tooltip=["artist", "track_name"]
).properties(
    width=500
)

In [11]:
med_plus_gym['energy'].corr(med_plus_gym['loudness'], method = 'pearson')

0.8474741919806698

# **Song Order for Constructing Narratives**

With this section, we wanted to look at how the order of a playlist conveys meaning. In particular, we chose the playlist I had made from the initial 40 song playlist. The overarching idea I went with was something representing going through and recovering from difficult times.

One way I went about doing this was to have a sort of escalation in the first half, representing things feeling worse and worse, before dropping and building up again, but in a more positive mood. To see this represented visually, we simply graphed each song by its energy. As seen in the first graph below, this idea of escalation is indeed reflected in the energy of each track.

In [12]:
subset_tracks = pd.DataFrame(sp.user_playlist_tracks("4uxd1wpjsxqvlpni1ixj09ipn", "5YS9zh4bCOpJqhenIJfFyw"))

In [13]:
subset_audio_features = get_audio_features_df(subset_tracks)
#subset_audio_features["Playlist Name"] = "Meditation & Reflection"


In [14]:
alt.Chart(subset_audio_features).mark_point().encode(
    x=alt.X("track_name", sort=None),
    y='energy',
    #color="Playlist Name",
    tooltip=["artist", "track_name"]
).properties(
    width=500
)

# **Creating a Community from Similar Playlists**

In this part, we wanted to create a network of songs from the playlists based on the 40 songs provided. In particular, we took playlists with descriptions referencing some prior knowledge or familiarity with the songs included, whether by the individual who made the playlist or somebody they know (e.g. parents).

When generating the Louvain network, we noticed that despite using 5 playlists, only 3 communities were created. Naturally, since all the playlists started from the same 40 songs, we should expect some overlap, but it was interesting nontheless that there was so much overlap that only 3 distinct communities were generated. 

In [45]:
# Create a list of playlists
familiar_playlists_dfs_list = []
familiar_playlists_ids_list = ["2hfOGugGPsjfPTYKlZojom",
                          "3cfp7fWCHVXPl6JiBnmkLQ",
                          "5QxWG2oKSylpTg9qS5rPRr",
                           "5LVjFO57XKpRNV5vzzDmP5",
                          "0QZa8PuiIKpSRDXFcbc2y2",]

# Looping through the items and producing Audio Features DataFrames
for item in familiar_playlists_ids_list:
  temp_playlist_df = pd.DataFrame(sp.playlist_items(item))
  temp_playlist_audio = get_audio_features_df(temp_playlist_df)
  temp_playlist_audio["playlist_name"] = sp.playlist(item)["name"]
  familiar_playlists_dfs_list.append(temp_playlist_audio)
    
# Concatenating the Audio Features DataFrames
familiar_playlists_df = pd.concat(familiar_playlists_dfs_list)
familiar_playlists_df.nunique()["track_name"]

30

In [46]:
input_data_familiar_df = familiar_playlists_df

In [47]:
# Creating an HTML node
def create_node_html(node: str, source_df: pd.DataFrame, node_col: str):
    rows = source_df.loc[source_df[node_col] == node].itertuples()
    html_lis = []
    for r in rows:
        html_lis.append(f"""<li>Artist: {r.artist}<br>
                                Playlist: {r.playlist_name}<br>"""
                       )
    html_ul = f"""<ul>{''.join(html_lis)}</ul>"""
    return html_ul

In [48]:
# Adding nodes from an Edgelist
def add_nodes_from_edgelist(edge_list: list, 
                               source_df: pd.DataFrame, 
                               graph: nx.Graph,
                               node_col: str):
    graph = deepcopy(graph)
    node_list = pd.Series(edge_list).apply(pd.Series).stack().unique()
    for n in node_list:
        graph.add_node(n, title=create_node_html(n, source_df, node_col), spring_length=1000)
    return graph

In [49]:
# Adding Louvain Communities
def add_communities(G):
    G = deepcopy(G)
    partition = community_louvain.best_partition(G)
    nx.set_node_attributes(G, partition, "group")
    return G

In [50]:
def choose_network(df, chosen_word, file_name):
    
    # creating unique pairs
    output_grouped = df.groupby(['playlist_name'])[chosen_word].apply(list).reset_index()
    pairs = output_grouped[chosen_word].apply(lambda x: list(combinations(x, 2)))
    pairs2 = pairs.explode().dropna()
    unique_pairs = pairs.explode().dropna().unique()
    
    # creating a new Graph
    pyvis_graph = net.Network(notebook=True, width="1000", height="1000", bgcolor="black", font_color="white")
    G = nx.Graph()
    
    try:
        G = add_nodes_from_edgelist(edge_list=unique_pairs, source_df=input_data_familiar_df, graph=G, node_col=chosen_word)
    except Exception as e:
        print(e)
    
    # add edges and find communities
    G.add_edges_from(unique_pairs)
    G = add_communities(G)
    pyvis_graph.from_nx(G)
    return pyvis_graph

In [53]:
louvain_network = choose_network(input_data_familiar_df, 'track_name', 'modified_familiar.html')
louvain_network.show("modified_familiar.html")

In [51]:
feature_columns = ["danceability", "energy", "speechiness", "liveness", "instrumentalness", "valence", "danceability"]

def createRadarElement(row, feature_cols):
    return go.Scatterpolar(
        r = row[feature_cols].values.tolist(), 
        theta = feature_cols, 
        mode = 'lines', 
        name = row['track_name'])

def get_radar_plot(playlist_id, features_list):
    current_playlist_audio_df = get_audio_features_df(pd.DataFrame(sp.playlist_items(playlist_id)))
    current_data = list(current_playlist_audio_df.apply(createRadarElement, axis=1, args=(features_list, )))  
    fig = go.Figure(current_data, )
    fig.show(renderer = 'iframe')
    fig.write_image(playlist_id + '.png', width=1200, height=800)
    
def get_radar_plots(playlist_id_list, features_list):
    for item in playlist_id_list:
        get_radar_plot(item, features_list)

In [52]:
#same as above, just displays using artist name instead of track name
def createRadarElementArtist(row, feature_cols):
    return go.Scatterpolar(
        r = row[feature_cols].values.tolist(), 
        theta = feature_cols, 
        mode = 'lines', 
        name = row['artist'])

def get_radar_plot_artist(playlist_id, features_list):
    current_playlist_audio_df = get_audio_features_df(pd.DataFrame(sp.playlist_items(playlist_id)))
    current_data = list(current_playlist_audio_df.apply(createRadarElement, axis=1, args=(features_list, )))  
    fig = go.Figure(current_data, )
    fig.show(renderer = 'iframe')
    fig.write_image(playlist_id + '.png', width=1200, height=800)

In [24]:
get_radar_plot_artist("4sWTtloXKghMS1cz48f2qI", feature_columns)

In [25]:
list_of_lists = ["1jPGGXaawH9yUzTZBei5Hi",
                "1ECGflprVeWBWgJKqh2We5",
                "6t1GwwI6JvES6I3PWrmgIr",
                "5MPTim2q6agXKcYyEGeJjc"]

get_radar_plots(list_of_lists, feature_columns)

# **Comparing Physical Activity Playlists**

(The following documentation/comments comes from Elise) In this section, we wanted to look at how different playlists centered around physical activity compared to eachother.

We first compared the playlist "Dad's Basement Gym" with "Songs to Punch Drywall To". We will then be comparing all of the physical activity playlists with each other. 

In [26]:
# playlist tracks for Dad's Basement Gym, Songs to Puch Drywall To
dads_basement_gym = pd.DataFrame(sp.user_playlist_tracks("lqo6rtvo820c0ergub92yuurs", "7hIYT6eLgtTgFeEwaGqY6l"))
punch_drywall_to = pd.DataFrame(sp.user_playlist_tracks("4uxd1wpjsxqvlpni1ixj09ipn", "0opioCiJiL23i467NPjq5O"))

In [27]:
gym_audio_features_df = get_audio_features_df(dads_basement_gym)
drywall_audio_features_df = get_audio_features_df(punch_drywall_to)

In [28]:
dads_basement_gym_id = "7hIYT6eLgtTgFeEwaGqY6l"
get_radar_plot(dads_basement_gym_id, feature_columns)

In [29]:
punch_drywall_to_id = "0opioCiJiL23i467NPjq5O"
get_radar_plot(punch_drywall_to_id, feature_columns)

We initially adjusted our feature columns in the radar plots to only include energy, valence, and loudness, but felt that there was not enough information to formulate a conclusion for either of these playlists. We omitted loudness from the radar plots and included the original columns: speechiness, energy, liveness, instrumentalness, valence, and danceability, which gave a clearer depiction of the similarities and differences between these two playlists.

More songs in Dad's Basement Gym have higher valence levels.
More songs in Songs to Punch Drywall to have higher levels of instrumentalness.
Both playlists have high energy levels.

In [30]:
gym_audio_features_df["Author"] = "Dad"
gym_audio_features_df

Unnamed: 0,artist,album,track_name,track_id,danceability,energy,key,loudness,mode,speechiness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,Author
0,Duran Duran,Rio (Collector's Edition),Hungry Like the Wolf - 2009 Remaster,39lSeqnyjZJejRuaREfyLL,0.691,0.812,0,-7.772,1,0.0558,1e-06,0.299,0.685,127.568,220627,4,Dad
1,Peter Schilling,The Different Story (World Of Lust And Crime),Major Tom - Coming Home,709ffFK4o4MtG1NKoLvOma,0.787,0.42,4,-14.32,0,0.0645,0.164,0.169,0.838,161.247,301373,4,Dad
2,Michael Jackson,Thriller 25 Super Deluxe Edition,Thriller,3S2R0EVwBSAVMd5UMgKTL0,0.773,0.859,11,-4.913,1,0.0747,0.000187,0.914,0.813,118.459,357267,4,Dad
3,Bee Gees,How Can You Mend A Broken Heart,Stayin Alive,5ubvP9oKmxLUVq506fgLhk,0.702,0.773,10,-9.256,0,0.0343,0.00871,0.147,0.953,103.564,285373,4,Dad
4,Guns N' Roses,Appetite For Destruction,Welcome To The Jungle,0G21yYKMZoHa30cYVi1iA8,0.453,0.987,8,-4.485,1,0.0889,0.283,0.268,0.316,123.544,273480,4,Dad
5,Bon Jovi,Slippery When Wet,Wanted Dead Or Alive,0oerlffJSzhRVvtDfLcp3N,0.252,0.802,7,-3.976,1,0.0391,0.0146,0.309,0.253,150.056,308667,4,Dad
6,AC/DC,Back In Black,Back In Black,08mG3Y1vljYA6bvDt4Wqkj,0.31,0.7,9,-5.678,1,0.047,0.00965,0.0828,0.763,188.386,255493,4,Dad
7,Van Halen,1984 (Remastered),Jump - 2015 Remaster,7N3PAbqfTjSEU1edb2tY8j,0.572,0.835,0,-6.219,1,0.0317,0.000377,0.0702,0.795,129.981,241600,4,Dad
8,Loverboy,We Are The '80s,Working for the Weekend,0sgDEFq9oeC8ueOK0mR5IA,0.526,0.924,7,-5.591,1,0.0391,0.000728,0.598,0.793,147.28,220707,4,Dad
9,Pat Benatar,Crimes Of Passion,Hit Me With Your Best Shot,0vOkmmJEtjuFZDzrQSFzEE,0.741,0.58,4,-9.05,1,0.0304,3.3e-05,0.212,0.944,127.402,171267,4,Dad


In [31]:
#drywall features
drywall_playlist_tracks = pd.DataFrame(sp.user_playlist_tracks("4uxd1wpjsxqvlpni1ixj09ipn", "0opioCiJiL23i467NPjq5O"))
drywall_playlist_tracks_audio_df = get_audio_features_df(drywall_playlist_tracks)
drywall_playlist_tracks_audio_df["Author"] = "Kyle"

# combining songs to punch drywall to and dad's basement gym
two_playlists_combined = pd.concat([drywall_playlist_tracks_audio_df, gym_audio_features_df], ignore_index=True)
two_playlists_combined

Unnamed: 0,artist,album,track_name,track_id,danceability,energy,key,loudness,mode,speechiness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,Author
0,KMFDM,WWIII,WWIII,5jHYEOabEw2RSwOzf8nQPn,0.534,0.966,7,-6.597,1,0.0688,0.104,0.436,0.171,139.987,298707,4,Kyle
1,King Gizzard & The Lizard Wizard,Infest The Rats' Nest,Mars For The Rich,4lV1tx0EdlIChOw7Kz7fP0,0.269,0.885,11,-5.938,0,0.0948,0.795,0.132,0.325,129.191,251720,4,Kyle
2,Led Zeppelin,Led Zeppelin III (1994 Remaster),Immigrant Song - 1990 Remaster,6kjlvJLh2DBsSQtqVzFh8I,0.574,0.926,11,-10.79,1,0.0562,0.478,0.386,0.609,113.097,145067,4,Kyle
3,A Day To Remember,Homesick,Mr. Highway's Thinking About The End,3VnBhHJdT7p05Wtenu4fmt,0.301,0.916,10,-6.218,0,0.0919,1.8e-05,0.293,0.451,150.091,255827,4,Kyle
4,The Vines,Future Primitive,Black Dragon,2Z8EqEtv0CxT6Jfzfdhtbf,0.191,0.807,0,-4.726,1,0.0426,0.897,0.158,0.246,138.723,208747,4,Kyle
5,Pantera,Vulgar Display of Power,Fucking Hostile,5RU50TPTgtxJLFNunUS2p5,0.479,0.96,6,-9.551,0,0.0991,4.6e-05,0.208,0.506,93.867,168600,4,Kyle
6,Mastodon,Leviathan,Blood and Thunder,3jagGO7eHHuaD53ibehkux,0.364,0.935,2,-6.548,1,0.0724,0.729,0.0317,0.478,92.717,228587,4,Kyle
7,Slayer,Reign In Blood,Raining Blood,01Mpj13vURSO3cCLprPt5T,0.294,0.995,1,-4.383,0,0.168,0.331,0.128,0.0379,88.777,254400,4,Kyle
8,Pantera,Cowboys from Hell,Domination,769cLRTw2y6KRdkFWFkxtu,0.372,0.888,2,-5.88,1,0.0943,0.00442,0.115,0.555,141.374,305293,4,Kyle
9,Judas Priest,Painkiller,Painkiller,0L7zm6afBEtrNKo6C6Gj08,0.435,0.987,1,-4.667,0,0.16,0.0847,0.268,0.101,103.16,365827,4,Kyle


In [54]:
# charting energy side by side
alt.Chart(two_playlists_combined).mark_point().encode(
    x=alt.X("track_name", sort=None),
    y='energy',
    color="Author",
    tooltip=["artist", "track_name"]
).properties(
    width=1000
)

In [55]:
print("Dad's data: \n", two_playlists_combined[two_playlists_combined["Author"] == "Dad"]["energy"].describe(), "\n")
print("Kyle's data: \n", two_playlists_combined[two_playlists_combined["Author"] == "Kyle"]["energy"].describe())

Dad's data: 
 count    13.000000
mean      0.773077
std       0.171702
min       0.420000
25%       0.700000
50%       0.812000
75%       0.859000
max       0.987000
Name: energy, dtype: float64 

Kyle's data: 
 count    11.000000
mean      0.933000
std       0.057749
min       0.807000
25%       0.902000
50%       0.935000
75%       0.976500
max       0.998000
Name: energy, dtype: float64


In [56]:
# charting loudness side by side
alt.Chart(two_playlists_combined).mark_point().encode(
    x=alt.X("track_name", sort=None),
    y='loudness',
    color="Author",
    tooltip=["artist", "track_name"]
).properties(
    width=1000
)

In [57]:
print("Dad's data: \n", two_playlists_combined[two_playlists_combined["Author"] == "Dad"]["loudness"].describe(), "\n")
print("Kyle's data: \n", two_playlists_combined[two_playlists_combined["Author"] == "Kyle"]["loudness"].describe())

Dad's data: 
 count    13.000000
mean     -7.491308
std       3.224840
min     -14.320000
25%      -9.050000
50%      -6.219000
75%      -5.591000
max      -3.976000
Name: loudness, dtype: float64 

Kyle's data: 
 count    11.000000
mean     -6.448909
std       2.005087
min     -10.790000
25%      -6.572500
50%      -5.938000
75%      -5.183000
max      -4.383000
Name: loudness, dtype: float64


In [58]:
# charting valence side by side
alt.Chart(two_playlists_combined).mark_point().encode(
    x=alt.X("track_name", sort=None),
    y='valence',
    color="Author",
    tooltip=["artist", "track_name"]
).properties(
    width=1000
)

Interestingly enough, Dad's Basement Gym has higher average valence levels (0.669154) compared to Songs to Punch Drywall To (0.332718). This is somewhat inversely proportional to the data for energy levels for these playlists.

In [37]:
print("Dad's data: \n", two_playlists_combined[two_playlists_combined["Author"] == "Dad"]["valence"].describe(), "\n")
print("Kyle's data: \n", two_playlists_combined[two_playlists_combined["Author"] == "Kyle"]["valence"].describe())

Dad's data: 
 count    13.000000
mean      0.669154
std       0.279353
min       0.188000
25%       0.395000
50%       0.793000
75%       0.838000
max       0.963000
Name: valence, dtype: float64 

Kyle's data: 
 count    11.000000
mean      0.332718
std       0.197112
min       0.037900
25%       0.175500
50%       0.325000
75%       0.492000
max       0.609000
Name: valence, dtype: float64


Now, we will be comparing all of the physical activity playlists created by the class:
Songs to Punch Drywall To,
Running Until I Pass Out,
Dad's Basement Gym,
British High Schooler's Lift Session

In [38]:
# comparing all physical activity playlists:
    # songs to punch drywall to
    # running until I pass out
    # dad's basement gym
    # british high schooler's lift session
physical_activity_playlists = []
physical_export_playlists_list = ["0opioCiJiL23i467NPjq5O",
                              "6JaCaTSXvLCEXlPXswEIH8", 
                              "7hIYT6eLgtTgFeEwaGqY6l",
                              "7I076Vit9Fd65C9fVNlPy0",]

for item in physical_export_playlists_list:
  temp_playlist_df = pd.DataFrame(sp.playlist_items(item))
  temp_playlist_audio = get_audio_features_df(temp_playlist_df)
  temp_playlist_audio["playlist_name"] = sp.playlist(item)["name"]
  temp_playlist_audio["Author"] = "Everyone"
  physical_activity_playlists.append(temp_playlist_audio)

four_activity_playlists = pd.concat(physical_activity_playlists)
#four_activity_playlists

In [39]:
# charting energy for four physical activity playlists
alt.Chart(four_activity_playlists).mark_point().encode(
    x=alt.X("track_name", sort=None),
    y='energy',
    color="playlist_name",
    tooltip=["artist", "track_name", "playlist_name"]
).properties(
    width=1200
)

In [40]:
# charting loudness for four physical activity playlists
alt.Chart(four_activity_playlists).mark_point().encode(
    x=alt.X("track_name", sort=None),
    y='loudness',
    color="playlist_name",
    tooltip=["artist", "track_name", "playlist_name"]
).properties(
    width=1200
)

In [41]:
# charting valence for four physical activity playlists
alt.Chart(four_activity_playlists).mark_point().encode(
    x=alt.X("track_name", sort=None),
    y='valence',
    color="playlist_name",
    tooltip=["artist", "track_name", "playlist_name"]
).properties(
    width=1200
)

In [42]:
# Louvain Communities
physical_activity_playlists = []
physical_export_playlists_list = ["0opioCiJiL23i467NPjq5O",
                              "6JaCaTSXvLCEXlPXswEIH8", 
                              "7hIYT6eLgtTgFeEwaGqY6l",
                              "7I076Vit9Fd65C9fVNlPy0",]

# Looping through the items and producing Audio Features DataFrames
for item in physical_export_playlists_list:
  temp_playlist_df = pd.DataFrame(sp.playlist_items(item))
  temp_playlist_audio = get_audio_features_df(temp_playlist_df)
  temp_playlist_audio["playlist_name"] = sp.playlist(item)["name"]
  physical_activity_playlists.append(temp_playlist_audio)
    
# Concatenating the Audio Features DataFrames
physical_activity_playlists_df = pd.concat(physical_activity_playlists)
#physical_activity_playlists_df

In [43]:
input_data_physical_df = physical_activity_playlists_df.reset_index()

In [44]:
louvain_network = choose_network(input_data_physical_df, 'artist', 'modified_physical.html')
louvain_network.show("modified_physical.html")

Unsure of what we would find (or if we would find anything at all), we did Louvain communities for all of the physical activities playlists and got three communites. Notably, there is a single edge that is connecting The Smashing Pumpkins to Led Zeppelin and the rest of the community. This is due to a playlist (Running Until I Pass Out) consisting of only songs from The Smashing Pumpkins and Led Zeppelin. This was put in a community with artists from Songs to Punch Drywall To.