In [1]:
import pandas as pd
import numpy as np
from custom_plots import plot_categorical_comparison, plot_correlation_matrix
from sklearn.preprocessing import MinMaxScaler

In [2]:
COLOR_DICT = {
                'cdn': '#B32E29',
                'us': '#3279B3',
                'uk': '#DE7012',
                'int': '#FACD14'
             }

In [3]:
df = pd.read_csv('data/clean_data.csv')

In [4]:
df.columns

Index(['Unnamed: 0', 'week_of', 'station', 'station_city', 'station_province',
       'station_latitude', 'station_longitude', 'chart_position',
       'artist_name(s)', 'artist_country', 'artist_home_city',
       'artist_home_latitude', 'artist_home_longitude',
       'km_distance_(home_station)', 'album_name', 'label_name', 'label_type',
       'language_of_music', 'visible_ethnic_minority',
       'census_race_classification', 'artist_gender', 'm_music', 'a_artist',
       'p_performance', 'l_lyrics', 'artist_is_group', 'canadian_content',
       'city_population'],
      dtype='object')

In [5]:
plot_categorical_comparison(
                                        categories = df['census_race_classification'], 
                                        compare_by = df['artist_country'], 
                                        comparison_filter=['us','cdn','uk','int'], 
                                        color_dict=COLOR_DICT )

In [6]:
plot_categorical_comparison(
                                        categories = df['artist_gender'], 
                                        compare_by = df['artist_country'], 
                                        comparison_filter=['us','cdn','uk','int'], 
                                        color_dict=COLOR_DICT )

In [7]:
plot_categorical_comparison(
                                        categories = df['visible_ethnic_minority'], 
                                        compare_by = df['station_province'], 
                                        category_filter=['yes','no'], 
                                        # color_dict=COLOR_DICT )
)

In [8]:
ontario_df = df[df['station_province'] == 'ontario']
plot_categorical_comparison(
                                        categories = ontario_df['visible_ethnic_minority'], 
                                        compare_by = ontario_df['station_city'], 
                                        # comparison_filter=['us','cdn','uk','int'], 
                                        # color_dict=COLOR_DICT )
                            )   

In [9]:
plot_categorical_comparison(
                                        categories = df['visible_ethnic_minority'], 
                                        compare_by = df['station'], 
                                        category_filter=['yes','no'], 
                                        # color_dict=COLOR_DICT )
)

In [10]:
station_df = pd.read_csv('data/clean_data_stations.csv', index_col=0)
station_df.head(1)

Unnamed: 0,station,station_city,city_population,station_province,station_latitude,station_longitude,total_plays,total_artists,artists_to_plays_ratio,artists_to_population_ratio,bipoc_artists,canadian_artists,male_artists,english_plays
0,cjsr,edmonton,932550,alberta,53.55,-113.5,1920,425,0.221354,0.000456,0.651042,0.432292,0.851562,0.986458


In [64]:
import plotly.graph_objects as go
DEFAULT_COLOR_SCALE =   [
                        [0.0, "#84145C"],
                        [0.5, "rgb(255,255,255)"],
                        [1.0, "#276B36"]
                        ]
def plot_correlation_matrix(df:pd.DataFrame, color_scale=DEFAULT_COLOR_SCALE):
    corr = df.corr()
    mask = np.triu(np.ones_like(corr, dtype=bool))
    df_mask = corr.mask(mask)

    fig = go.Figure()

    trace = go.Heatmap(
                        z=df_mask.to_numpy(),
                        x=df_mask.columns.tolist(),
                        y=df_mask.columns.tolist(),
                        zmin=-1,
                        zmax=1,
                        colorscale= color_scale,
                        text=df_mask.to_numpy(),
                        # textfont_color='white', 
                        # texttemplate='%{text:.0%}', 
                        # textposition='inside'
                        )
    fig.add_trace(trace)
    axis_template = dict(showgrid = False, zeroline = False)    
    fig.update_layout(
        title_text='Heatmap', 
        title_x=0.5, 
        width=1000, 
        height=1000,
        yaxis = axis_template,
        xaxis = axis_template,
        yaxis_autorange = 'reversed',
        template='plotly_white',
    )
    fig.show()

In [63]:
station_numeric = station_df[[
                                'total_plays',
                                'total_artists',
                                'bipoc_artists',
                                'canadian_artists',
                                'male_artists',
                                'english_plays'
                            ]]
plot_correlation_matrix(station_numeric)