In [2]:
import pandas as pd, numpy as np
import plotly.express as px

In [5]:
# read data
df = pd.read_csv('./data/avg_yearly.csv')
display(df.head())

Unnamed: 0,year_,women_perc,RM5,decade,year
0,1945,0.032222,,40's,5
1,1946,0.035185,,40's,6
2,1947,0.022727,,40's,7
3,1948,0.032632,,40's,8
4,1949,0.041053,0.032764,40's,9


In [6]:
fig = px.line(df, x='year_', y=['women_perc','RM5'],
            title='Women Representation in Parliament (Global Average) from 1945 to 2018',
            markers=True,
            labels=dict(year_='Year', women_perc='Participation (%)')
            )
fig.update_yaxes(title_text='Women (%)')

fig.update_layout(
        font_family="Courier New",
        font_color="grey",
        title_font_family="Courier New",
        title_font_color="#1f77b4",
        legend_title_font_color="grey",
        autosize=False,
        width=800,
        height=500
    )
fig.update_xaxes(nticks=10)
fig.update_xaxes(rangeslider_visible=True)

fig.show()

In [7]:
temp = df.query("year_ > 1949")

fig = px.line(df, x="year", y="women_perc", color='decade', markers=True,
              title='Women Representation in Parliament (Global Average) by Decade',
              labels=dict(year='Year', women_perc='Women (%)', decade='Decade')
)
fig.update_layout(
        font_family="Courier New",
        font_color="grey",
        title_font_family="Courier New",
        title_font_color="#1f77b4",
        legend_title_font_color="grey",
        autosize=False,
        width=800,
        height=400
        )
fig.update_xaxes(nticks=20)
fig.show()

In [9]:
fig = px.box(df, x="decade", y="women_perc", color="decade",
             title="Women Representation in Parliament (Global Average) from 50's to 10's ",
             labels=dict(women_perc='Women (%)', decade='Decade')
)

fig.update_layout(
        font_family="Courier New",
        font_color="grey",
        title_font_family="Courier New",
        title_font_color="#1f77b4",
        legend_title_font_color="grey",
        autosize=False,
        width=800,
        height=400
        )

fig.show()

In many countries, women have been underrepresented in the government and different institutions. As shown in the graphs above, this historical tendency still persists, although women's percentage in national parliaments is increasing. In the 50's and 60's, women accounted only for 5% or 6% of parliament members around the world with late 70's showing a grow in participation; almost double figures for certain years. 80's and 90's followed the same upward trend with the proportion of women reaching or exceeding 1% of total. Undoubtly, from early 2000 onwards there was an impressive rise of women in national parliaments with the global average nearly doubling, reaching almost 25% in the recent years.

In [11]:
import plotly.graph_objects as go

# Calculate percentage increase on mean global participation averaged by decade
df_dec_avg = pd.DataFrame(df.groupby(['decade'])['women_perc'].median())
df_dec_avg = df_dec_avg.sort_values('women_perc')
df_dec_avg['perc_change'] = round( df_dec_avg['women_perc'].pct_change(), 2)

fig = go.Figure(data=[go.Table(header=dict(values=['Decade', 'Percentage Increase']),
                 cells=dict(values=[df_dec_avg.index[1:], df_dec_avg['perc_change'].iloc[1:]]))
                     ])
fig.show()

In [69]:
# read population data
df_pop = pd.read_excel('./data/WPP2019_POP_F01_1_TOTAL_POPULATION_BOTH_SEXES.xlsx',usecols=[2,7,17,27,37,47,57,67,75])
df_pop = df_pop.iloc[15:]
df_pop.columns = df_pop.iloc[0]
df_pop = df_pop.iloc[1:]
df_pop = df_pop.rename(columns={'Region, subregion, country or area *':'decade'})

# reshape population data
df_pop_ = df_pop.T.reset_index()
df_pop_.columns = df_pop_.iloc[0]
df_pop_ = df_pop_.iloc[1:]
df_pop_ = df_pop_.set_index('decade')

# read historical data
data = pd.read_csv('./data/women_in_parliament-historical_database-1945_to_2018_cleaned.csv')

# exclude from election results 'upper' chamber type
data = data.query("chamber_type != 'upper' ")

# get average percentage participation by country
df_avg_dc = pd.DataFrame(data.groupby(['country', 'decade'])['women_perc'].mean().reset_index())


# reshape population df
countries = list(df_avg_dc['country'].unique())
for i in range(len(countries)):
    if i == 0:
        country = countries[i]
        df_pop_c_d = pd.DataFrame(df_pop_[country].reset_index())
        df_pop_c_d = temp1.rename(columns={country:'pop'})
        df_pop_c_d['country'] = country
    else:
        country = countries[i]
        try:
            temp = pd.DataFrame(df_pop_[country].reset_index())
            temp = temp.rename(columns={country:'pop'})
            temp['country'] = country
            df_pop_c_d = pd.concat([temp,df_pop_c_d])
        except:
            pass

df_pop_c_d['decade'] = df_pop_c_d['decade'].astype('int64')

# merge population to participation by decade, country data
df_pop_participation = df_avg_dc.merge(df_pop_c_d, on=['country','decade'])
df_pop_participation.dropna(inplace=True)
df_pop_participation['pop'] = df_pop_participation['pop'].astype('float')

df_pop_participation = df_pop_participation.sort_values(by='decade')

In [72]:
df_pop_participation.head()

Unnamed: 0,country,decade,women_perc,pop
1044,Indonesia,1950,0.045,69543.321
1280,New Zealand,1950,0.046667,1908.011
1273,Netherlands,1950,0.085,10042.051
1266,Nepal,1950,0.07,8483.322
1230,Mongolia,1950,0.17,780.199


In [74]:
fig = px.scatter(df_pop_participation, x='country', y='women_perc', color='country', size='pop',
                facet_col='decade', facet_col_wrap=3,
                title='Women Participation in Parliaments by Decade - World',
                labels=dict(women_perc='Women (%)', decade='Decade', country='Country',pop='Population (thousands)'),

)


#fig.update_yaxes(title_text='Women (%)')
fig.update_yaxes(tick0=0, dtick=0.2)


fig.update_layout(
        font_family="Courier New",
        font_color="grey",
        title_font_family="Courier New",
        title_font_color="#1f77b4",
        legend_title_font_color="grey"
        )

fig.show()