# Get average feature stats for each period

In [1]:
import json
from pprint import pprint
import pandas as pd

In [2]:
#Read in Data
json_path = 'Outputs/features.json'
with open(json_path) as data:
    data = json.load(data)

In [2]:
def getAvg(list):
    return sum(list) / len(list)

In [9]:
avg_stats = []

for period in data:
    danceability_period, energy_period, loudness_period, mode_period,\
    speechiness_period, acousticness_period, instrumentalness_period,\
    liveness_period, valence_period, tempo_period, duration_period = ([] for i in range(11))

    for track in period['top_tracks']:
        features = track['features']
        danceability_period.append(features['danceability'])
        energy_period.append(features['energy'])
        loudness_period.append(features['loudness'])
        mode_period.append(features['mode'])
        speechiness_period.append(features['speechiness'])
        acousticness_period.append(features['acousticness'])
        instrumentalness_period.append(features['instrumentalness'])
        liveness_period.append(features['liveness'])
        valence_period.append(features['valence'])
        tempo_period.append(features['tempo'])
        duration_period.append(features['duration_ms'])
        
    avg_stats.append({'date': period['date'],
                    'stats': {
                        'avg_danceability': getAvg(danceability_period),
                        'avg_energy': getAvg(energy_period),
                        'avg_loudness': getAvg(loudness_period),
                        'avg_mode': getAvg(mode_period),
                        'avg_speechiness': getAvg(speechiness_period),
                        'avg_acousticness': getAvg(acousticness_period),
                        'avg_instrumentalness': getAvg(instrumentalness_period),
                        'avg_liveness': getAvg(liveness_period),
                        'avg_valence': getAvg(valence_period),
                        'avg_tempo': getAvg(tempo_period),
                        'avg_duration': getAvg(duration_period),
                        'top_genres': []
                    }})

In [11]:
#Save JSON
with open('./Outputs/period_stats.json', 'w', encoding='utf-8') as f:
    json.dump(avg_stats, f, ensure_ascii=False, indent=4)

## Get Genre information

In [26]:
#Select Genres to Track
genre_list = ['pop', 'rock', 'country', 'r&b', 'disco', 'funk', 'rap', 'metal']

In [30]:
#Function to check if targer genre is in track genre list
def check_genre(track_genres, target_genre):
    for genre in track_genres:
        if target_genre in genre:
            return 1
    return 0

In [38]:
#Create list with number of each genre category per period
genre_stats = []
for period in data:
    pop_count=rock_count=country_count=rnb_count=disco_count=funk_count=rap_count=metal_count = 0

    for track in period['top_tracks']:
        pop_count += check_genre(track['genres'], 'pop')
        rock_count += check_genre(track['genres'], 'rock')
        country_count += check_genre(track['genres'], 'country')
        rnb_count += check_genre(track['genres'], 'r&b')
        disco_count += check_genre(track['genres'], 'disco')
        funk_count += check_genre(track['genres'], 'funk')
        rap_count += check_genre(track['genres'], 'rap')
        metal_count += check_genre(track['genres'], 'metal')
    
    genre_stats.append({'date': period['date'],
                       'genres': {
                           'pop': pop_count,
                           'rock': rock_count,
                           'country': country_count,
                           'r&b': rnb_count,
                           'disco': disco_count,
                           'funk': funk_count,
                           'rap': rap_count,
                           'metal': metal_count
                       }})

In [39]:
#Save JSON
with open('./Outputs/genre_stats.json', 'w', encoding='utf-8') as f:
    json.dump(genre_stats, f, ensure_ascii=False, indent=4)