In [None]:
# Import libraries
import pandas as pd
import numpy as np

# Skip first 15 rows because it has unnecessary information
df=pd.read_csv('Women Percentage in National Parliament.csv', skiprows=15)
df.head()

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Elections,Seats,Women,% W,Elections.1,Seats.1,Women.1,% W.1
0,1,Rwanda,7.2024,80,51,63.8,09.2024,26,13,50.0
1,2,Cuba,3.2023,470,262,55.7,-,-,-,-
2,3,Nicaragua,11.2021,91,50,55.0,-,-,-,-
3,4,Bolivia (Plurinational State of),8.2025,130,66,50.8,08.2025,36,21,58.3
4,5,Mexico,6.2024,500,251,50.2,06.2024,128,64,50.0


In [None]:
# Renaming columns
new_columns=['Rank','Country',
             'Chamber1-Election','Chamber1-Seats',
             'Chamber1-Women','Chamber1-Women Percentage',
             'Chamber2-Elections','Chamber2-Seats',
             'Chamber2-Women','Chamber2-Women Percentage']
df.columns=new_columns

In [None]:
# Determining data types of columns
df.dtypes

Unnamed: 0,0
Rank,object
Country,object
Chamber1-Election,object
Chamber1-Seats,object
Chamber1-Women,object
Chamber1-Women Percentage,object
Chamber2-Elections,object
Chamber2-Seats,object
Chamber2-Women,object
Chamber2-Women Percentage,object


In [None]:
# Converting some types to numerical data types
numeric_columns=['Rank','Chamber1-Seats','Chamber1-Women','Chamber1-Women Percentage',
                 'Chamber2-Seats','Chamber2-Women','Chamber2-Women Percentage']

# Some ranks are 'suspended', convert them to np.nan
df['Rank']=df['Rank'].replace('suspended',np.nan)

# Replacing '-' with an empty string first, then converting to numeric will handle NaN
for col in numeric_columns:
    df[col]=df[col].str.replace('-','')
for col in numeric_columns:
    df[col]=pd.to_numeric(df[col],errors='coerce')
df.dtypes

Unnamed: 0,0
Rank,float64
Country,object
Chamber1-Election,object
Chamber1-Seats,float64
Chamber1-Women,float64
Chamber1-Women Percentage,float64
Chamber2-Elections,object
Chamber2-Seats,float64
Chamber2-Women,float64
Chamber2-Women Percentage,float64


In [None]:
# Selecting the countries that have only 1 chamber
df_unicameral=df[df['Chamber2-Seats'].isna()].copy()

# Selecting the needed columns
unicameral_cols=['Rank','Country','Chamber1-Election','Chamber1-Seats','Chamber1-Women','Chamber1-Women Percentage']
df_unicameral=df_unicameral[unicameral_cols]

# Renaming the selected columns
new_names={'Chamber1-Election':'Elections','Chamber1-Seats':'Seats','Chamber1-Women':'Women','Chamber1-Women Percentage':'Women Percentage'}
df_unicameral.rename(columns=new_names,inplace=True)


In [None]:
df_unicameral['Rank']=df_unicameral['Rank'].fillna('Not Ranked')


In [None]:
# Testing whether this new df_unicameral works as expected
df_unicameral.head()

Unnamed: 0,Rank,Country,Elections,Seats,Women,Women Percentage
1,2.0,Cuba,3.2023,470.0,262.0,55.7
2,3.0,Nicaragua,11.2021,91.0,50.0,55.0
5,6.0,Andorra,4.2023,28.0,14.0,50.0
6,Not Ranked,United Arab Emirates,10.2023,40.0,20.0,50.0
7,8.0,Costa Rica,2.2022,57.0,28.0,49.1


In [None]:
# Converting to csv
df_unicameral.to_csv('Unicameral_cleaned.csv',index=False)

In [None]:
# Selecting the countries that have only 1 chamber
df_bicameral=df[df['Chamber2-Seats'].notna()].copy()

# Renaming the selected columns
new_names={'Chamber1-Election':'Elections1','Chamber1-Seats':'Seats1','Chamber1-Women':'Women1','Chamber1-Women Percentage':'Women Percentage1',
           'Chamber2-Elections':'Elections2','Chamber2-Seats':'Seats2','Chamber2-Women':'Women2','Chamber2-Women Percentage':'Women Percentage2'}
df_bicameral.rename(columns=new_names,inplace=True)

In [None]:
df_bicameral['Rank']=df_bicameral['Rank'].fillna('Not Ranked')

In [None]:
# Testing whether this new df_unicameral works as expected
df_bicameral.head()

Unnamed: 0,Rank,Country,Elections1,Seats1,Women1,Women Percentage1,Elections2,Seats2,Women2,Women Percentage2
0,1.0,Rwanda,7.2024,80.0,51.0,63.8,9.2024,26.0,13.0,50.0
3,4.0,Bolivia (Plurinational State of),8.2025,130.0,66.0,50.8,8.2025,36.0,21.0,58.3
4,5.0,Mexico,6.2024,500.0,251.0,50.2,6.2024,128.0,64.0,50.0
8,9.0,Australia,5.2025,150.0,69.0,46.0,5.2025,76.0,43.0,56.6
15,16.0,South Africa,5.2024,398.0,178.0,44.7,6.2024,54.0,24.0,44.4


In [None]:
df_bicameral.tail()

Unnamed: 0,Rank,Country,Elections1,Seats1,Women1,Women Percentage1,Elections2,Seats2,Women2,Women Percentage2
175,176.0,Nigeria,2.2023,356.0,15.0,4.2,2.2023,107.0,4.0,3.7
179,180.0,Oman,10.2023,90.0,0.0,0.0,11.2023,86.0,18.0,20.9
181,Not Ranked,Yemen,4.2003,245.0,0.0,0.0,4.2001,90.0,1.0,1.1
182,Not Ranked,Argentina,10.2025,257.0,,,10.2025,72.0,,
185,Not Ranked,Nepal,11.2022,1.0,,,1.2024,59.0,22.0,37.3


In [None]:
# Converting to csv
df_bicameral.to_csv('Bicameral_cleaned.csv',index=False)