In [8]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from matplotlib import rc
import matplotlib as mpl

plt.style.use('ggplot')
pd.options.display.width = 0

sns.set_style("whitegrid", {"grid.color": "#cbd2d9"})

mpl.rcParams['font.size'] = 12
mpl.rcParams['axes.facecolor'] = '#eeede9'
mpl.rcParams['figure.facecolor'] = '#eeede9'

rc('font',**{'family':'sans-serif','sans-serif':['DM Sans']})
rc('text', usetex=True)

In [4]:
freq_read_df = pd.read_csv('reading-data/clean/frequency-of-read.csv')

In [20]:
freq_read_df

Unnamed: 0,age,region,frequencies,male_read,female_read
0,6 - 14,Bangkok,Daily,168272,178217.0
1,15 - 24,Bangkok,Daily,399372,388568.0
2,25 - 39,Bangkok,Daily,943768,1054825.0
3,40 - 59,Bangkok,Daily,932599,934143.0
4,60 and over,Bangkok,Daily,241798,269489.0
5,6 - 14,Bangkok,4 - 6 days/week,96940,69225.0
6,15 - 24,Bangkok,4 - 6 days/week,49932,50044.0
7,25 - 39,Bangkok,4 - 6 days/week,150570,154079.0
8,40 - 59,Bangkok,4 - 6 days/week,159777,187699.0
9,60 and over,Bangkok,4 - 6 days/week,47049,53064.0


In [5]:
melted_df = pd.melt(freq_read_df, id_vars=['age', 'region', 'frequencies'], 
                    value_vars=['male_read', 'female_read'], 
                    var_name='gender', value_name='read_amount')

# Replace 'male_read' and 'female_read' with 'Male' and 'Female' in the 'gender' column
melted_df['gender'] = melted_df['gender'].replace({'male_read': 'Male', 'female_read': 'Female'})

In [38]:
melted_df

Unnamed: 0,age,region,frequencies,gender,read_amount
0,6 - 14,Bangkok,Daily,Male,168272.0
1,15 - 24,Bangkok,Daily,Male,399372.0
2,25 - 39,Bangkok,Daily,Male,943768.0
3,40 - 59,Bangkok,Daily,Male,932599.0
4,60 and over,Bangkok,Daily,Male,241798.0
5,6 - 14,Bangkok,4 - 6 days/week,Male,96940.0
6,15 - 24,Bangkok,4 - 6 days/week,Male,49932.0
7,25 - 39,Bangkok,4 - 6 days/week,Male,150570.0
8,40 - 59,Bangkok,4 - 6 days/week,Male,159777.0
9,60 and over,Bangkok,4 - 6 days/week,Male,47049.0


In [6]:
melted_df.fillna(method='ffill', inplace=True)

In [11]:
grouped_data = melted_df.groupby(['age', 'gender', 'frequencies'])['read_amount'].sum().reset_index()

In [17]:
pivot_table = melted_df.pivot_table(index=['age', 'gender', 'region'], columns='frequencies', values='read_amount', fill_value=0)

In [18]:
pivot_table

Unnamed: 0_level_0,Unnamed: 1_level_0,frequencies,1 day/week,2 - 3 days/week,4 - 6 days/week,Daily,Occasionally
age,gender,region,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
15 - 24,Female,Bangkok,1745,16039,50044,388568,7669
15 - 24,Male,Bangkok,1311,19605,49932,399372,2001
25 - 39,Female,Bangkok,3632,106697,154079,1054825,19195
25 - 39,Male,Bangkok,5348,125009,150570,943768,20630
40 - 59,Female,Bangkok,14959,146201,187699,934143,67321
40 - 59,Male,Bangkok,13870,132823,159777,932599,51603
6 - 14,Female,Bangkok,1745,30259,69225,178217,17437
6 - 14,Male,Bangkok,704,33388,96940,168272,3576
60 and over,Female,Bangkok,17437,83237,53064,269489,43544
60 and over,Male,Bangkok,10408,59253,47049,241798,47159


In [1]:
def format_thousands(x, pos):
    if x >= 1000:
        return f'{x / 1000:.0f}k'
    else:
        return int(x)

In [None]:
pivot_table.plot(kind='bar', stacked=True)
plt.gca().yaxis.set_major_formatter(ticker.FuncFormatter(format_thousands))

# Customize the plot
plt.xlabel('Age Group')
plt.ylabel('Frequencies')
plt.title('Male and Female Frequencies by Age Group')
plt.legend()

# Show the plot
plt.show()