# Is Punk Dead?
Looking at Spotify's punk playlists portray punk music in the streaming era.

Find the project at https://sawyerclick.github.io/punk

### Importing Our Tools

In [1]:
import requests, zipfile, io
import pandas as pd
from bs4 import BeautifulSoup
import re
import datetime as dt  
import numpy as np
from dotenv import load_dotenv
load_dotenv()
import base64
import os
import requests
import json
pd.set_option('display.max_colwidth', -1)
import matplotlib
import matplotlib.pyplot as plt
matplotlib.rcParams['pdf.fonttype'] = 42
import seaborn as sns
import nltk
# nltk.download()

%matplotlib inline
plt.style.use('fivethirtyeight')

genius_api_key = os.getenv("GENIUS_ACCESS")

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import sys

# spotify_client_id = os.getenv("SPOTIFY_CLIENT_ID")
# spotify_client_secret = os.getenv("SPOTIFY_CLIENT_SECRET")
# token = os.getenv("SPOTIFY_ACCESS_TOKEN")

In [2]:
filepath = "NRC-Emotion-Lexicon/NRC-Emotion-Lexicon-v0.92/NRC-Emotion-Lexicon-Wordlevel-v0.92.txt"
emolex_df = pd.read_csv(filepath,  names=["word", "emotion", "association"], skiprows=45, sep='\t')
emo_df = emolex_df[((emolex_df.emotion == 'sadness') | (emolex_df.emotion == 'fear')) & (emolex_df.association == 1)]

# This one uses <code>Spotify's API</code>
- retains special info
- refresh my access token every damn hour (https://developer.spotify.com/console/get-track/?id=3n3Ppam7vgaVa1iaRUc9Lp)

### Grabbing the playlists

In [None]:
access_token = os.getenv("SPOTIFY_ACCESS_TOKEN")
token = f"Bearer {access_token}"
base_url = "https://api.spotify.com/v1"
headers = {'Authorization': token}
search_url = "https://api.spotify.com/v1/browse/categories/punk/playlists" # Right now just looking for punk playlists

params = {'limit':50}
response = requests.get(search_url, params=params, headers=headers).json()

### Turning it into a dataframe

In [None]:
rows = []
for playlist in response['playlists']['items']:
    if playlist['owner']['display_name'] == "Spotify":
        print(playlist['name'])
        
        playlist_response = requests.get(playlist['tracks']['href'],headers=headers).json()
        for song in playlist_response['items']:
            row = {}
            
            row['playlist_name'] = playlist['name']
            row['playlist_link'] = playlist['external_urls']['spotify']
            row['playlist_api'] = playlist['tracks']['href']
            row['song_name'] = song['track']['name']
            row['song_album'] = song['track']['album']['name']
            row['artist_name'] = song['track']['album']['artists'][0]['name']
            row['song_release_date'] = song['track']['album']['release_date'].split("-")[0]
            row['song_length'] = "{:.2f}".format((song['track']['duration_ms'] / 60000))
            row['song_preview'] = song['track']['preview_url']
            row['song_id'] = song['track']['id']
            row['song_explicit'] = song['track']['explicit']
            row['song_popularity'] = song['track']['popularity']
            
            search_url = base_url + "/audio-features/" + song['track']['id']
            response_track_features = requests.get(search_url, headers=headers).json()
            for key in response_track_features.keys():
                row[key] = response_track_features[key]
            
            artist_response = requests.get(song['track']['album']['artists'][0]['href'],headers=headers).json()
            row['artist_genres'] = artist_response['genres']
            row['artist_followers'] = artist_response['followers']['total']
            row['artist_popularity'] = artist_response['popularity']
                
            rows.append(row)

### Kicking out the dirty data

In [None]:
df = pd.DataFrame(rows)
df.song_release_date = df.song_release_date.astype(int)
df[(df.valence != 'Nan') & (df.energy != 'Nan') & (df.loudness != 'Nan') & (df.song_popularity != 'Nan')].to_csv('spotify_punk_playlists.csv', index=False)

### Reading in the csv here
There are two of them:
<code>spotify_punk_playlists.csv</code> is all decades
<code>spotify_punk_playlists_post90s</code> is 90s plus

In [3]:
df = pd.read_csv('spotify_punk_playlists.csv')
df[df.song_release_date > 1990].to_csv('spotify_punk_playlists_post90s.csv') ## post-90s

### Grouping things by decades and setting up a column so that I can order things

In [4]:
def get_decade(x):
    if x.song_release_date == 2019:
        return "'19"
    elif x.song_release_date >= 2010:
        return "'10s"
    elif x.song_release_date >= 2000:
        return "'00s"
    elif x.song_release_date >= 1990:
        return "'90s"
    elif x.song_release_date >= 1980:
        return "'80s"
    elif x.song_release_date >= 1970:
        return "'70s"
    elif x.song_release_date >= 1960:
        return "'60s"
    
df['decade'] = df.apply(get_decade, axis=1)

def get_order(x):
    if x.song_release_date == 2019:
        return 1
    elif x.song_release_date >= 2010:
        return 2
    elif x.song_release_date >= 2000:
        return 3
    elif x.song_release_date >= 1990:
        return 4
    elif x.song_release_date >= 1980:
        return 5
    elif x.song_release_date >= 1970:
        return 6
    elif x.song_release_date >= 1960:
        return 7
    
df['order'] = df.apply(get_order, axis=1)

In [5]:
decade_energy_df = pd.DataFrame(df.groupby('order').energy.mean().reset_index())

def get_decade(x):
    if x == 1:
        return "'19"
    elif x == 2:
        return "'10s"
    elif x == 3:
        return "'00s"
    elif x == 4:
        return "'90s"
    elif x == 5:
        return "'80s"
    elif x == 6:
        return "'70s"
    elif x == 7:
        return "'60s"
decade_energy_df['decade'] = decade_energy_df.order.apply(lambda x: get_decade(x))
decade_energy_df.to_csv('decades_energy.csv', columns=['decade', 'energy'], index=False)

In [6]:
df_decade_counts = df.decade.value_counts().reset_index().rename(columns={'decade':'count','index':'decade'})
df_decade_counts['artist_followers'] = pd.DataFrame(df.groupby('order').artist_followers.median().reset_index()).artist_followers

def get_decade(x):
    if x == "'19":
        return 1
    elif x == "'10s":
        return 2
    elif x == "'00s":
        return 3
    elif x == "'90s":
        return 4
    elif x == "'80s":
        return 5
    elif x == "'70s":
        return 6
    elif x == "'60s":
        return 7
    
df_decade_counts['order'] = df_decade_counts.decade.apply(lambda x: get_decade(x))

df_decade_counts.sort_values('order').to_csv('decades_in_playlists_counts.csv', index=False)

df_decade_counts

Unnamed: 0,decade,count,artist_followers,order
0,'19,682,6819.5,1
1,'10s,469,27225.0,2
2,'00s,262,127324.0,3
3,'90s,122,60314.5,4
4,'80s,50,105852.5,5
5,'70s,39,130573.0,6
6,'60s,21,49029.0,7


### Grabbing each artist's genres so that I can figure out the most popular ones

In [16]:
df['artist_genres'] = df['artist_genres'].str.replace("[\[\]']", '', regex=True)

In [31]:
genreRows = []
def get_genres(x):
    for genre in x.artist_genres.split(', '):
        row = {}
        row['genre'] = genre
        row['playlist'] = x.playlist_name
        row['popularity'] = x.song_popularity
        row['artist'] = x.artist_name
#         print(row)
        genreRows.append(row)        
        
df.apply(get_genres, axis=1)
pd.DataFrame(genreRows).to_csv('genres_dict.csv', index=False)

In [32]:
pd.DataFrame(genreRows)

Unnamed: 0,artist,genre,playlist,popularity
0,blink-182,pop punk,New Punk Tracks,0
1,blink-182,punk,New Punk Tracks,0
2,blink-182,socal pop punk,New Punk Tracks,0
3,Stand Atlantic,anthem emo,New Punk Tracks,38
4,Stand Atlantic,pixie,New Punk Tracks,38
5,Stand Atlantic,pop emo,New Punk Tracks,38
6,Stand Atlantic,pop punk,New Punk Tracks,38
7,Bayside,alternative emo,New Punk Tracks,0
8,Bayside,dreamo,New Punk Tracks,0
9,Bayside,emo,New Punk Tracks,0


In [39]:
genresList = df['artist_genres'].str.replace("[\[\]']", '', regex=True)

all_genres = []
for genres in genresList.str.split(', '):
    for genre in genres:
        all_genres.append(genre)

In [34]:
# pd.DataFrame(all_genres).rename(columns={0:'genre'}).to_csv('genres_list.csv', index=False)

In [24]:
genre_counts = pd.DataFrame(pd.Series(all_genres).value_counts()).reset_index().rename(columns={'index':'genre', 0:'counts'})
genre_counts = genre_counts[genre_counts.genre != '']

genre_counts.to_csv('spotify_genre_counts.csv')

genre_counts.head()

Unnamed: 0,genre,counts
0,pop punk,321
2,punk,251
3,emo,234
4,anthem emo,226
5,melodic hardcore,191


In [None]:
playlists_df = pd.DataFrame(df.groupby('artist_name').playlist_name.value_counts()).rename(columns={"playlist_name": 'playlists_in'}).reset_index()

def get_list(x):
    playlists = []
    if x.artist_name == x.artist_name:
        playlists.append(x.playlist_name)
    return playlists

playlists_df.apply(get_list, axis=1)
playlists_df.head()

In [None]:
# df['valence'] = df.valence * 100
# df['danceability'] = df.danceability * 100
# df['energy'] = df.energy * 100
df.dropna(subset=['energy','valence','song_popularity','danceability']).to_csv('spotify_punk_playlists.csv', index=False)

In [None]:
df_features = {}

df_features['decade'] = pd.DataFrame(df.groupby('decade').danceability.mean().reset_index()).decade
df_features['popularity'] = pd.DataFrame(df.groupby('decade').song_popularity.mean().reset_index()).song_popularity
df_features['energy'] = pd.DataFrame(df.groupby('decade').energy.mean().reset_index()).energy
df_features['length'] = pd.DataFrame(df.groupby('decade').duration_ms.mean().reset_index()).duration_ms / 60000
df_features['danceability'] = pd.DataFrame(df.groupby('decade').danceability.mean().reset_index()).danceability
df_features['valence'] = pd.DataFrame(df.groupby('decade').valence.mean().reset_index()).valence
df_features['loudness'] = pd.DataFrame(df.groupby('decade').loudness.mean().reset_index()).loudness
df_features = pd.DataFrame(df_features)

def get_decade(x):
    if x == "'19":
        return 1
    elif x == "'10s":
        return 2
    elif x == "'00s":
        return 3
    elif x == "'90s":
        return 4
    elif x == "'80s":
        return 5
    elif x == "'70s":
        return 6
    elif x == "'60s":
        return 7
    
df_features['order'] = df_features.decade.apply(lambda x: get_decade(x))
df_features = df_features.sort_values('order')

df_features.to_csv('song_features.csv', index=False)