# Women in Parliament

In the present notebook, women's participation in national parliaments since 1948 is explore using [Plotly graphical library](https://plotly.com/python/). 

Data was derived from the [Inter Parliamentary Union (IPU)](https://www.ipu.org/)

In [2]:
import pandas as pd, numpy as np
import plotly.express as px

In [1]:


def plot_perc_years(df, title):
    fig = px.line(df, x='year', y='women_perc', color='country', markers=True, 
            title=title,
            labels=dict(women_perc="Women in Parliament(%)", year="Year")
            )
    fig.update_layout(
        font_family="Courier New",
        font_color="grey",
        title_font_family="Courier New",
        title_font_color="#1f77b4",
        legend_title_font_color="grey",
        autosize=False,
        width=1200,
        height=600
    )
    fig.update(layout_yaxis_range = [-0.1,.8])
    fig.update_xaxes(nticks=20)

    fig.show()

In [2]:
# Read historical data
data = pd.read_csv('./data/women_in_parliament-historical_database-1945_to_2018_cleaned.csv')
df = data.copy()
display(df.info())

# Exclude from election results 'upper' chamber type
df = df.query("chamber_type != 'upper' ")

avg_yearly = pd.read_csv('./data/avg_yearly.csv')
display(avg_yearly.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3815 entries, 0 to 3814
Data columns (total 10 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   country              3815 non-null   object 
 1   region               3814 non-null   object 
 2   election_renewal     3815 non-null   object 
 3   year                 3815 non-null   int64  
 4   month                1018 non-null   object 
 5   chamber_type         3814 non-null   object 
 6   chamber_total_seats  3815 non-null   object 
 7   women_total          3753 non-null   float64
 8   women_perc           3740 non-null   float64
 9   notes                1330 non-null   object 
dtypes: float64(2), int64(1), object(7)
memory usage: 298.2+ KB


None

Unnamed: 0,year_,women_perc,RM5,decade,year
0,1945,0.032222,,40's,5
1,1946,0.035185,,40's,6
2,1947,0.022727,,40's,7
3,1948,0.032632,,40's,8
4,1949,0.041053,0.032764,40's,9


### 2018 Numbers

In [None]:
# read population data for 2018
pop_18 = pd.read_excel('./data/WPP2019_POP_F01_1_TOTAL_POPULATION_BOTH_SEXES.xlsx',usecols=[2,4,5,75])
pop_18 = pop_18.rename(columns={'Unnamed: 2':'country', 'Unnamed: 4':'code', 'Unnamed: 5':'region_type', 'Unnamed: 75':'pop_2018'})
#display(pop_18.head())

# retrieve info only for 2018
df_18 = df.query("year == 2018")

#df_18['pop_2018'] = df_18['pop_2018'].astype(float)

# merge with population data
df_18 = df_18.merge(pop_18, on='country', how='left')

In [None]:
fig = px.treemap(df_18, path=[px.Constant("World"), 'region', 'country'], values='pop_2018',
                  color='women_perc',
                  color_continuous_scale='RdBu',
                  title='Women Representation in Parliament (2018) - Population',
                  color_continuous_midpoint=np.average(df_18['women_perc'], weights=df_18['women_perc']))
fig.update_layout(margin = dict(t=50, l=25, r=25, b=25), font_family="Courier New")
fig.show()

In [None]:
import plotly.express as px

fig = px.bar(df_18.sort_values('women_perc', ascending=False), x='country', y='women_perc',
            color='region',
            labels=dict(country="Country", women_perc="Representation (%)"),
            title='Women Represetation in Parliament (2018) ')

fig.show()

In [None]:
import plotly.express as px

country = 'Greece'
df_ = df.query("country == @country")
fig = px.line(df_, x="year", y="women_perc", 
            title='{} - Women Representation in Parliament'.format(country),
            labels=dict(women_perc='Women in Parliament(%)', year="Year")
            )
fig.update_layout(
        font_family="Courier New",
        font_color="blue",
        title_font_family="Courier New",
        title_font_color="red",
        legend_title_font_color="green"
    )
fig.show()


## Regional Trends


In [None]:
# create a dataframe fro each region
df_eur = df.query("(chamber_type != 'upper') and (year >= 2000) and (region == 'EUR')")
df_mena = df.query("(chamber_type != 'upper') and (year >= 2000) and (region == 'MENA')")
df_sahara = df.query("(chamber_type != 'upper') and (year >= 2000) and (region == 'SUB-SAHARAN')")
df_ame = df.query("(chamber_type != 'upper') and (year >= 2000) and (region == 'AME')")

In [None]:
df.query("year==2013 and region=='AME'")

In [None]:
(0.37+0.45+0.32+0.26+0.18)/5

In [None]:
region_yearly_df = pd.DataFrame(df.groupby(['year','region'])['women_perc'].mean().reset_index())
temp = region_yearly_df.query("year>=1995")

fig = px.line(temp, x='year', y='women_perc', color='region', markers=True, 
            title='Kat',
            labels=dict(women_perc="Women in Parliament(%)", year="Year")
            )
fig.update_layout(
    font_family="Courier New",
    font_color="grey",
    title_font_family="Courier New",
    title_font_color="#1f77b4",
    legend_title_font_color="grey",
    autosize=False,
    width=1200,
    height=600
)
fig.update(layout_yaxis_range = [-0.01,.4])
fig.update_xaxes(nticks=19)

fig.show()

In 2018 the americas became the first region to reach the 30% threshold 

In [None]:
fig = px.scatter(temp, x='pop', y='women_perc', color='region', size='pop',
                facet_col='decade', facet_col_wrap=3,
                title='Women Participation in Parliaments by Population - World',
                labels=dict(women_perc='Women (%)', decade='Decade', country='Country',pop='Population (thousands)'),

)

#fig.update_yaxes(title_text='Women (%)')
fig.update_yaxes(tick0=0, dtick=0.2)


fig.update_layout(
        font_family="Courier New",
        font_color="grey",
        title_font_family="Courier New",
        title_font_color="#1f77b4",
        legend_title_font_color="grey"
        )

fig.show()

In [None]:
# plot percentage throught the years by country
plot_perc_years(df_eur, 'Europe - Women Representation in Parliament since 2000(%)')

In [None]:
# select region
# plot percentage throught the years by country
plot_perc_years(df_mena, 'Middle East and North Africa - Women Representation in Parliament since 2000(%)')

In [None]:
def perc_change(df, country_name):
    country = df_mena[df_mena['country'] == country_name]
    country = country.sort_values('year')
    country['perc_change'] = country['women_perc'].diff(1)

    return(country)

perc_change(df_mena, 'Qatar')

In [None]:

# plot percentage throught the years by country
plot_perc_years(df_sahara, 'Sub-Sahara Africa - Women Representation in Parliament since 2000(%)')

In [None]:

# plot percentage throught the years by country
plot_perc_years(df_ame, 'Americas - Women Representation in Parliament since 2000(%)')

In [None]:
temp = pd.DataFrame(df_ame.groupby(['year'])['women_perc'].mean().reset_index())
temp = temp.rename(columns={'women_perc':'AME'})
temp

In [None]:
# select region for Asia
df_asia = df.query("(chamber_type != 'upper') and (year >= 2000) and (region == 'ASIA')")
# plot percentage throught the years by country
plot_perc_years(df_asia, 'Asia - Women Representation in Parliament since 2000(%)')

In [None]:
# select region for Europe
df_pac = df.query("(chamber_type != 'upper') and (year >= 2000) and (region == 'PAC')")
# plot percentage throught the years by country
plot_perc_years(df_pac, 'Pacific - Women Representation in Parliament since 2000(%)')

In [None]:
(Palau, Solomon Islands, Tuvalu, Tonga, Vanuatu, Papua New Guinea, Micronesia)