In [2]:
import pandas as pd
import numpy as np
from custom_plots import plot_categorical_comparison, plot_correlation_matrix
from sklearn.preprocessing import MinMaxScaler

In [3]:
COLOR_DICT = {
                'cdn': '#B32E29',
                'us': '#3279B3',
                'uk': '#DE7012',
                'int': '#FACD14',
                'alberta':'#587AFF',
                'british columbia':'#37BD54',
                'manitoba': '#FFD140',
                'nova scotia' : '#FF9A30',
                'ontario' : '#BD2F24',
                'quebec' : '#60C3FF',
                'new brunswick' : '#9552FF'
             }

In [4]:
df = pd.read_csv('data/clean_data.csv')

In [5]:
df.columns

Index(['Unnamed: 0', 'week_of', 'station', 'station_city', 'station_province',
       'station_latitude', 'station_longitude', 'chart_position',
       'artist_name(s)', 'artist_country', 'artist_home_city',
       'artist_home_latitude', 'artist_home_longitude',
       'km_distance_(home_station)', 'album_name', 'label_name', 'label_type',
       'language_of_music', 'visible_ethnic_minority',
       'census_race_classification', 'artist_gender', 'm_music', 'a_artist',
       'p_performance', 'l_lyrics', 'artist_is_group', 'canadian_content',
       'city_population'],
      dtype='object')

In [6]:
plot_categorical_comparison(
                                        categories = df['census_race_classification'], 
                                        compare_by = df['artist_country'], 
                                        comparison_filter=['us','cdn','uk','int'], 
                                        color_dict=COLOR_DICT )

In [7]:
plot_categorical_comparison(
                                        categories = df['artist_gender'], 
                                        compare_by = df['artist_country'], 
                                        comparison_filter=['us','cdn','uk','int'], 
                                        color_dict=COLOR_DICT )

In [8]:
plot_categorical_comparison(
                                        categories = df['visible_ethnic_minority'], 
                                        compare_by = df['station_province'], 
                                        category_filter=['yes','no'], 
                                        color_dict=COLOR_DICT )


In [9]:
plot_categorical_comparison(
                                        categories = df['canadian_content'], 
                                        compare_by = df['station_province'], 
                                        # category_filter=['yes','no'], 
                                        color_dict=COLOR_DICT )

In [10]:
station_df = pd.read_csv('data/clean_data_stations.csv')
station_df[station_df['station_province'] == 'nova scotia']

Unnamed: 0.1,Unnamed: 0,station,station_city,city_population,station_province,station_latitude,station_longitude,total_plays,total_artists,artists_to_plays_ratio,artists_to_population_ratio,bipoc_artists,canadian_artists,male_artists,english_plays
6,6,ckdu,halifax,403130,nova scotia,44.65,-63.6,780,169,0.216667,0.000419,0.520513,0.75641,0.916667,0.979487


In [11]:
station_df['total_plays'].sum()

35858

In [12]:
ontario_df = df[df['station_province'] == 'ontario']
plot_categorical_comparison(
                            categories = ontario_df['visible_ethnic_minority'], 
                            category_filter=['yes','no'], 
                            compare_by = ontario_df['station_city'], 
                            color_dict=COLOR_DICT 
                            )

In [13]:
station_df = pd.read_csv('data/clean_data_stations.csv', index_col=0)
station_df.head(1)

Unnamed: 0,station,station_city,city_population,station_province,station_latitude,station_longitude,total_plays,total_artists,artists_to_plays_ratio,artists_to_population_ratio,bipoc_artists,canadian_artists,male_artists,english_plays
0,cjsr,edmonton,932550,alberta,53.55,-113.5,1920,425,0.221354,0.000456,0.651042,0.432292,0.851562,0.986458


In [36]:
station_correlations = station_df[[
                                'total_plays',
                                'total_artists',
                                'bipoc_artists',
                                'canadian_artists',
                                'male_artists',
                                'english_plays',
                                'city_population'
                            ]]
plot_correlation_matrix(station_correlations, title='Correlation Map of Station Data')

In [17]:
artist_df = pd.read_csv('data/clean_data_artists.csv',index_col=0)

In [32]:
artist_df.columns

Index(['artist_name(s)', 'artist_country', 'artist_home_city',
       'visible_ethnic_minority', 'census_race_classification',
       'artist_gender', 'canadian_content', 'artist_is_group', 'total_plays',
       'artist_country_cdn', 'artist_country_int', 'artist_country_uk',
       'artist_country_unknown', 'artist_country_us',
       'visible_ethnic_minority_no', 'visible_ethnic_minority_unknown',
       'visible_ethnic_minority_yes', 'artist_gender_female',
       'artist_gender_male', 'artist_gender_mixed', 'artist_gender_unknown'],
      dtype='object')

In [35]:
artist_correlations = artist_df[[
                                'total_plays',
                                'canadian_content',
                                'visible_ethnic_minority_yes',
                                'artist_gender_male',
                                'artist_is_group'
                                ]]
plot_correlation_matrix(artist_correlations._get_numeric_data())