In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

import plotly.express as px

def plot_perc_years(df, title):
    fig = px.line(df, x='year', y='women_perc', color='country', markers=True, 
            title=title,
            labels=dict(women_perc="Women in Parliament(%)", year="Year")
            )
    fig.update_layout(
        font_family="Courier New",
        font_color="blue",
        title_font_family="Courier New",
        title_font_color="red",
        legend_title_font_color="green"
    )
    fig.show()

# read women in parliament historical data file
data = pd.read_csv('./data/women_in_parliament-historical_database-1945_to_2018_cleaned.csv')

# pass data to dataframe named df
df = data.copy()
display(df.info())

# exclude election results for 'upper' chamber type
df = df.query("chamber_type != 'upper' ")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3815 entries, 0 to 3814
Data columns (total 10 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   country              3815 non-null   object 
 1   region               3814 non-null   object 
 2   election_renewal     3815 non-null   object 
 3   year                 3815 non-null   int64  
 4   month                1018 non-null   object 
 5   chamber_type         3814 non-null   object 
 6   chamber_total_seats  3815 non-null   object 
 7   women_total          3753 non-null   float64
 8   women_perc           3740 non-null   float64
 9   notes                1330 non-null   object 
dtypes: float64(2), int64(1), object(7)
memory usage: 298.2+ KB


None

### 2018 Numbers

In [2]:
# read population data for 2018
pop_18 = pd.read_excel('./data/WPP2019_POP_F01_1_TOTAL_POPULATION_BOTH_SEXES.xlsx',usecols=[2,4,5,75])
pop_18 = pop_18.rename(columns={'Unnamed: 2':'country', 'Unnamed: 4':'code', 'Unnamed: 5':'region_type', 'Unnamed: 75':'pop_2018'})
#display(pop_18.head())

# retrieve info only for 2018
df_18 = df.query("year == 2018")

# merge with population data
df_18 = df_18.merge(pop_18, on='country', how='left')

In [3]:
fig = px.treemap(df_18, path=[px.Constant("World"), 'region', 'country'], values='pop_2018',
                  color='women_perc',
                  color_continuous_scale='RdBu',
                  title='Women Representation in Parliament (2018) - Population',
                  color_continuous_midpoint=np.average(df_18['women_perc'], weights=df_18['women_perc']))
fig.update_layout(margin = dict(t=50, l=25, r=25, b=25), font_family="Courier New")
fig.show()

In [96]:
list(df['region'].unique())

['ASIA', 'EUR', 'MENA', 'SUB-SAHARAN', 'AME', 'PAC']

In [100]:
import plotly.express as px

fig = px.bar(df_18, x='country', y='women_perc',
            labels=dict(country="Country", women_perc="Representation (%)"),
            title='Women Represetation in Parliament (2018) ')

# Add dropdown
fig.update_layout(
    updatemenus=[
        dict(
            buttons=list([
                dict(
                    args=["Region","ASIA"],
                    label="ASIA", #, "EUR", "MENA", "PAC","AME","SUB-SAHARAN"],
                    method="relayout"
                ),
                dict(
                    args=["Region", "EUR"],
                    label="EUR",
                    method="relayout"
                ),
                dict(
                    args=["Region", "MENA"],
                    label="MENA",
                    method="relayout"
                ),
                dict(
                    args=["Region", "PAC"],
                    label="PAC",
                    method="relayout"
                ),
                dict(
                    args=["Region", "AME"],
                    label="AME",
                    method="relayout"
                )
            ]),
            direction="down",
            pad={"r": 2, "t": 2},
            showactive=True,
            x=0.0001,
            xanchor="center",
            y=1.2,
            yanchor="top"
        ),
    ]
)

fig.show()

### Through the Years

In [64]:
# calculate yearly average of all countries and rolling mean
avg_yearly = pd.DataFrame(df.groupby(['year'])['women_perc'].mean())
avg_yearly['RM5'] = avg_yearly['women_perc'].rolling(window=5).mean()
avg_yearly['year'] = avg_yearly.index
avg_yearly

fig = px.line(avg_yearly, x='year', y=['women_perc','RM5'],
            title='Women Representation in Parliament from 1948 to 2018 (Average of all Countries)',
            markers=True,
            labels=dict(year='Year')
            )


fig.update_layout(
        font_family="Courier New",
        font_color="blue",
        title_font_family="Courier New",
        title_font_color="red",
        legend_title_font_color="green"
    )

fig.show()

In [39]:
import plotly.express as px

country = 'Greece'
df_ = df.query("country == @country")
fig = px.line(df_, x="year", y="women_perc", 
            title='{} - Women Representation in Parliament'.format(country),
            labels=dict(women_perc='Women in Parliament(%)', year="Year")
            )
fig.update_layout(
        font_family="Courier New",
        font_color="blue",
        title_font_family="Courier New",
        title_font_color="red",
        legend_title_font_color="green"
    )
fig.show()


In [4]:
# select region for Asia
df_asia = df.query("(chamber_type != 'upper') and (year >= 2000) and (region == 'ASIA')")
# plot percentage throught the years by country
plot_perc_years(df_asia, 'Asia - Women Representation in Parliament since 2000(%)')

In [5]:
# select region for Europe
df_eur = df.query("(chamber_type != 'upper') and (year >= 2000) and (region == 'EUR')")
# plot percentage throught the years by country
plot_perc_years(df_eur, 'Europe - Women Representation in Parliament since 2000(%)')

In [10]:
# select region for Europe
df_mena = df.query("(chamber_type != 'upper') and (year >= 2000) and (region == 'MENA')")
# plot percentage throught the years by country
plot_perc_years(df_mena, 'Middle East and North Africa - Women Representation in Parliament since 2000(%)')

In [7]:
# select region for Europe
df_sahara = df.query("(chamber_type != 'upper') and (year >= 2000) and (region == 'SUB-SAHARAN')")
# plot percentage throught the years by country
plot_perc_years(df_sahara, 'Sub-Sahara Africa - Women Representation in Parliament since 2000(%)')

In [8]:
# select region for Europe
df_ame = df.query("(chamber_type != 'upper') and (year >= 2000) and (region == 'AME')")
# plot percentage throught the years by country
plot_perc_years(df_ame, 'Americas - Women Representation in Parliament since 2000(%)')

In [9]:
# select region for Europe
df_pac = df.query("(chamber_type != 'upper') and (year >= 2000) and (region == 'PAC')")
# plot percentage throught the years by country
plot_perc_years(df_pac, 'Pacific - Women Representation in Parliament since 2000(%)')

In [43]:
df_ = df.query("(chamber_type != 'upper')")
#df_.groupby('year')['women_perc'].mean()


In [50]:
df_18 = df_.query("year == 2018")
fig = px.treemap(df_18, path=[px.Constant("world"), 'region', 'country'], values='women_perc',
                  color='women_perc', title='Women Representation in Parliament 2018',
                  color_continuous_scale='RdBu',
                  color_continuous_midpoint=np.average(df_18['women_perc'], weights=df_18['women_perc']))
fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
fig.show()