# Decades
This notebook is to plot our cluster data created in the Training-Model Notebook per decade.

## Setup
First, the notebook will setup dependencies and import the data to be used.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import library as lib
import seaborn as sns

In [None]:
image_filepath = "../images/plots/decades"

In [None]:
df = lib.import_music_df_with_model()
df.head()

In [None]:
# Add Decade Column
df['Decade'] = (10 * (df['Year'] // 10)).astype(str) + 's'
df.head()

In [None]:
from collections import Counter
Counter(df["Cluster"])

## Plots
After cleaning the data, now the notebook will conduct a series of plots on the data to see what trends there are. For this, the notebook will be looking at the average for each of the musical attributes.

In [None]:
# Group by Decade and find the mean for values.
decade_gb = df.groupby(by=["Decade", "Cluster"])
decade_mean_df = decade_gb.median()
decade_mean_df

In [None]:
# Define data columns
columns = [
    "Placement", 
    "danceability",
    "energy",
    "key",
    "loudness",
    "speechiness",
    "acousticness",
    "liveness",
    "valence",
    "tempo"
]

# Define Legend Labels
legend_labels = ["Cluster 0", "Cluster 1", "Cluster 2"]

### Bar Charts

In [None]:
# Create Bar Charts
def decade_bar_chart(df, column, legend_labels = ["Cluster 0", "Cluster 1", "Cluster 2"]):
    title = f"Average {column.capitalize()} through the Decades"
    axis = df[[column]].unstack('Cluster').plot(kind="bar", title=title)
    axis.set_ylabel(column.capitalize())
    axis.legend(legend_labels, loc='center right', bbox_to_anchor=(1.275, .5))
    
decade_bar_chart(decade_mean_df, "danceability", legend_labels)
sns.set_theme()
plt.show()

In [None]:
for column in columns:
    decade_bar_chart(decade_mean_df, column, legend_labels)
    plt.savefig(f"{image_filepath}/bar/{column.lower()}.png", bbox_inches='tight')
    sns.set_theme()
    plt.show()

### Line Charts

In [None]:
def decade_line_chart(df, column, legend_labels = ["Cluster 0", "Cluster 1", "Cluster 2"]):
    title = f"Average {column.capitalize()} through the Decades"
    data = df
    if(column):
        data = df[[column]]
    axis = data.unstack('Cluster').plot(kind="line", title=title)
    axis.set_ylabel(column.capitalize())
    axis.legend(legend_labels, loc='center right', bbox_to_anchor=(1.275, .5))
    
decade_line_chart(decade_mean_df, "danceability", legend_labels)
sns.set_theme()
plt.show()

In [None]:
for column in columns:
    decade_line_chart(decade_mean_df, column, legend_labels)
    plt.savefig(f"{image_filepath}/line/{column.lower()}.png", bbox_inches='tight')
    sns.set_theme()
    plt.show()

### Count of Hits
Let's look at the number of hits per cluster per decade.

In [None]:
hits_counts_df = decade_gb.count()
hits_counts_df = hits_counts_df["Track"]

In [None]:
axis = hits_counts_df.unstack('Cluster').plot(kind="line", title = "Number of Hits throughout the Decades")
axis.set_ylabel("Number of Hits")
axis.legend(legend_labels, loc='center right', bbox_to_anchor=(1.275, .5))
plt.savefig(f"{image_filepath}/line/number_of_hits.png", bbox_inches='tight')
sns.set_theme()
plt.show()

### Count of Number 1 Hits
One additional aspect to look at is to see how many clusters hit the number 1 position within each decade.

In [None]:
number_one_df = df[df["Placement"] ==1]
number_one_df.head()

In [None]:
number_one_gb = number_one_df.groupby(["Decade", "Cluster"])
number_one_gb.head()

In [None]:
number_one_count_df = number_one_gb["Track"].count()

axis = number_one_count_df.unstack('Cluster').plot(kind="line", title = "Number of #1 Hits throughout the Decades")
axis.set_ylabel("Number of #1 Hits")
axis.legend(legend_labels, loc='center right', bbox_to_anchor=(1.275, .5))
plt.savefig(f"{image_filepath}/line/number_ones.png", bbox_inches='tight')
sns.set_theme()
plt.show()