# Decades
This notebook is to plot our cluster data created in the Training-Model Notebook per decade.

## Setup
First, the notebook will setup dependencies and import the data to be used.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import library as lib

In [2]:
image_filepath = "../images/plots/decades"

In [4]:
df = lib.import_music_df_with_model()
df.head()

Unnamed: 0,Track,Artist,Placement,Year,Month,Day,danceability,energy,key,loudness,speechiness,acousticness,liveness,valence,tempo,Cluster,Decade
0,El Paso,Marty Robbins,1,1960,1,4,0.654,0.452,2.0,-9.709,0.03,0.835,0.16,0.691,106.662,1,1960s
4,Why,Frankie Avalon,2,1960,1,4,0.51,0.349,5.0,-8.677,0.0264,0.762,0.13,0.586,94.267,1,1960s
7,The Big Hurt,Miss Toni Fisher,3,1960,1,4,0.624,0.557,0.0,-8.309,0.0299,0.783,0.173,0.676,123.919,2,1960s
14,Heartaches By The Number,Guy Mitchell,6,1960,1,4,0.584,0.589,5.0,-11.203,0.0718,0.463,0.069,0.842,172.484,0,1960s
16,It's Time To Cry,Paul Anka,7,1960,1,4,0.311,0.314,3.0,-12.81,0.0325,0.731,0.335,0.565,109.983,2,1960s


In [None]:
# Add Decade Column
df['Decade'] = (10 * (df['Year'] // 10)).astype(str) + 's'
df.head()

## Plots
After cleaning the data, now the notebook will conduct a series of plots on the data to see what trends there are. For this, the notebook will be looking at the average for each of the musical attributes.

In [None]:
# Group by Decade and find the mean for values.
decade_gb = df.groupby(by=["Decade", "Cluster"])
decade_mean_df = decade_gb.mean()
decade_mean_df

In [None]:
# Define data columns
columns = [
    "Placement", 
    "danceability",
    "energy",
    "key",
    "loudness",
    "speechiness",
    "acousticness",
    "liveness",
    "valence",
    "tempo"
]

# Define Legend Labels
legend_labels = ["Cluster 0", "Cluster 1", "Cluster 2"]

### Bar Charts

In [None]:
# Create Bar Charts
def decade_bar_chart(df, column, legend_labels = ["Cluster 0", "Cluster 1", "Cluster 2"]):
    title = f"Average {column.capitalize()} through the Decades"
    axis = df[[column]].unstack('Cluster').plot(kind="bar", title=title)
    axis.set_ylabel(column.capitalize())
    axis.legend(legend_labels, loc='center right', bbox_to_anchor=(1.275, .5))
    
decade_bar_chart(decade_mean_df, "danceability", legend_labels)
plt.show()

In [None]:
for column in columns:
    decade_bar_chart(decade_mean_df, column, legend_labels)
    plt.savefig(f"{image_filepath}/bar/{column.lower()}.png", bbox_inches='tight')
    plt.show()

### Line Charts

In [None]:
def decade_line_chart(df, column, legend_labels = ["Cluster 0", "Cluster 1", "Cluster 2"]):
    title = f"Average {column.capitalize()} through the Decades"
    data = df
    if(column):
        data = df[[column]]
    axis = data.unstack('Cluster').plot(kind="line", title=title)
    axis.set_ylabel(column.capitalize())
    axis.legend(legend_labels, loc='center right', bbox_to_anchor=(1.275, .5))
    
decade_line_chart(decade_mean_df, "danceability", legend_labels)
plt.show()

In [None]:
for column in columns:
    decade_line_chart(decade_mean_df, column, legend_labels)
    plt.savefig(f"{image_filepath}/line/{column.lower()}.png", bbox_inches='tight')
    plt.show()

### Count of Hits
Let's look at the number of hits per cluster per decade.

In [None]:
hits_counts_df = decade_gb.count()
hits_counts_df = hits_counts_df["Track"]

In [None]:
axis = hits_counts_df.unstack('Cluster').plot(kind="line", title = "Number of Hits throughout the Decades")
axis.set_ylabel("Number of Hits")
axis.legend(legend_labels, loc='center right', bbox_to_anchor=(1.275, .5))
plt.savefig(f"{image_filepath}/line/number_of_hits.png", bbox_inches='tight')
plt.show()

### Count of Number 1 Hits
One additional aspect to look at is to see how many clusters hit the number 1 position within each decade.

In [None]:
number_one_df = df[df["Placement"] == 1]
number_one_df.head()

In [None]:
number_one_gb = number_one_df.groupby(["Decade", "Cluster"])
number_one_gb.head()

In [None]:
number_one_count_df = number_one_gb["Track"].count()

axis = number_one_count_df.unstack('Cluster').plot(kind="line", title = "Number of #1 Hits throughout the Decades")
axis.set_ylabel("Number of #1 Hits")
axis.legend(legend_labels, loc='center right', bbox_to_anchor=(1.275, .5))
plt.savefig(f"{image_filepath}/line/number_ones.png", bbox_inches='tight')
plt.show()