In [147]:
# Mateusz Janusz (mjanu001@gold.ac.uk) survey code submission

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from pandas.api.types import CategoricalDtype

df = pd.read_csv('./music_survey.csv', usecols=[1, 2, 8])
df.columns = ['way', 'platform', 'age_group']
df['way'] = df['way'].astype(CategoricalDtype())
df['platform'] = df['platform'].astype(CategoricalDtype())
df['age_group'] = df['age_group'].astype(CategoricalDtype())
df.describe()

Unnamed: 0,way,platform,age_group
count,106,106,106
unique,45,50,5
top,Streaming,Spotify;Youtube,16-23
freq,25,20,69


In [154]:
plt.close('all')

# suming up all age groups and calculating their percentage
age_count = df['age_group'].value_counts()
age_groups = np.unique(df['age_group']) # age group options in the survey

explode = (0.1, 0, 0, 0, 0)  # only "explode" the 1st slice

age_fig, age_ax = plt.subplots(figsize=[10,10])

patches, texts, autotexts  = age_ax.pie(
    age_count, 
    explode=explode, 
    labels=age_groups, 
    autopct='%1.1f%%',
    shadow=True, 
    startangle=90,
    pctdistance=0.9
)

# edit font size of text on the pie chart
for i in range(len(texts)):
    texts[i].set_fontsize(13)
    autotexts[i].set_fontsize(12)
    
age_ax.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
age_ax.set_title('Age groups in the survey')

age_fig
age_fig.savefig('age_groups.png')

In [152]:
# Attach a text label above each bar displaying its height
def autolabel(rects, ax):
    for rect in rects:
        height = rect.get_height()
        ax.text(rect.get_x() + rect.get_width()/2., height+1.5,
                '%d' % int(height) +'%',
                ha='center', va='bottom')


# split multiple choice answers as single ones
ways = df['way'].str.split(';', expand=True)
ways = ways.stack()
ways_count = ways.value_counts()

way_options = list(ways_count.keys())
num_ways = len(ways_count)
rects = []

ways_fig, ways_ax = plt.subplots(figsize=[16,10])

# create bars in the chart for all ways
for i in range(num_ways):
    x = (num_ways*i+1)
    percentage = round((ways_count[i]/len(df))*100) #calcule percentage basend on total no answers 
    rect = ways_ax.bar(x, percentage, 6, label=way_options[i], align='edge', yerr=1)
    rects.append(rect)

ways_ax.legend() # show legend
ways_ax.set_ylim(ymin=0) # start bar charts at pos 0
ways_ax.set_xticklabels([]) # edit labels at x axis

ways_ax.set_title("How people listen to music")
ways_ax.set_ylabel('Percentage of participants who listen this way')

for rect in rects:
    autolabel(rect, ways_ax) # create a value label at the top of each bar

ways_fig
ways_fig.savefig('ways.png')

In [153]:
plt.close('all')

platform = df['platform'].str.split(';', expand=True)
platform = platform.stack()
platform_count = platform.value_counts()

num_plats = len(platform_count)
plat_options = list(platform_count.keys())

plat_rects = []
plat_fig, plat_ax = plt.subplots(figsize=[16,10])
plat_ax.set_ymargin(0.2)

for i in range(num_plats):
    x = (num_plats*i+1)
    percentage = round((platform_count[i]/len(df))*100)
    rect = plat_ax.bar(x, percentage, 10, label=plat_options[i], align='edge', yerr=1)
    plat_rects.append(rect)

plat_ax.legend()
plat_ax.set_xticklabels([])
plat_ax.set_title("How people discover music everyday")
plat_ax.set_ylabel('Percentage of participants')

for rect in plat_rects:
    autolabel(rect, plat_ax)


plat_fig
plat_fig.savefig('platforms.png')

In [151]:
plt.close('all')

import matplotlib.cm as cm

genre_df = pd.read_csv('./music_survey2.csv', usecols=[4])
genre_df.columns = ['genre']
genre_df['genre'] = genre_df['genre'].astype(CategoricalDtype())
genres = genre_df['genre'].str.split(';', expand=True)
genres = genres.stack()

genres_count = genres.value_counts()

num_genres = len(genres_count)
genres_options = list(genres_count.keys())

genre_rects = []
genre_fig, genre_ax = plt.subplots(figsize=[16,10])
genre_ax.set_ymargin(0.2)

colors = iter(cm.rainbow(np.linspace(0, 1, num_genres)))

for i in range(num_genres):
    x = (num_genres*i+1)
    percentage = round((genres_count[i]/len(genre_df))*100)
    rect = genre_ax.bar(x, percentage, 10, label=genres_options[i], align='edge', yerr=1, color=next(colors))
    genre_rects.append(rect)

genre_ax.legend()

genre_ax.set_xticklabels([])

genre_ax.set_title("Which music genres people mostly listened to in the last week")
genre_ax.set_ylabel('Percentage of participants')

for rect in genre_rects:
    autolabel(rect, genre_ax)


genre_fig
genre_fig.savefig('genre_fig.png')