In [1]:
#| echo: false
#| include: false
import pandas as pd
import os
from datetime import datetime
import altair as alt
import datetime
import matplotlib as plt
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.dates import HourLocator, DateFormatter

import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
from plotly.subplots import make_subplots
import plotly.express as px
import plotly.offline as pyo
import plotly.tools as tls

import warnings
warnings.filterwarnings('ignore')

pio.renderers.default = "plotly_mimetype+notebook_connected"

## Decision Making Data 

In [68]:
# Academics
# Import data
df_ad = pd.read_csv("Data/Politics/WMID_Acdemics_1.csv")

df_ad2 = df_ad[(df_ad['time'] == 2021) & (df_ad['_UNIT'] == 'NR')]

# Count number of people in each gender by country
gender = df_ad2.groupby(['geo', 'sex'])['value'].sum().reset_index()
gender = gender.rename(columns={'value': 'headcount'})


# Get unique countries
country = gender['geo'].unique()

# Filter out unwanted countries
unwanted_countries = ["European Union - 27 countries (from 2020)", 
                      "European Union - 28 countries (1993-2020)", 
                      "Instrument for Pre-accession Assistance (IPA) countries"]

gender_1 = gender[~gender['geo'].isin(unwanted_countries)]

# pivot the DataFrame to create the wide format
df_1 = gender_1.pivot_table(index='geo', columns='sex', values='headcount').reset_index()

df_1['category'] = 'Academics'


In [70]:
# Judiciary
# Importing data from CSV file
df_jd = pd.read_csv("Data/Politics/WMID_Judiciary_1.csv", encoding='UTF-8')

# Renaming the first column to "time"
df_jd = df_jd.rename(columns={'Unnamed: 0': 'time'})

# Filtering data based on conditions
df_jd2 = df_jd[(df_jd['time'] == 2021) & (df_jd['_UNIT'] == 'NR')]

# Counting number of people in each gender by country
gender_ju = df_jd2.groupby(['geo', 'sex']).agg({'value': 'sum'}).reset_index()
gender_ju = gender_ju.rename(columns={'value': 'headcount'})

# Removing unwanted countries
unwanted_countries = ["European Union - 27 countries (from 2020)", 
                      "European Union - 28 countries (1993-2020)", 
                      "Instrument for Pre-accession Assistance (IPA) beneficiaries"]

gender_ju_1 = gender_ju[~gender_ju['geo'].isin(unwanted_countries)]

df_2 = gender_ju_1.pivot_table(index='geo', columns='sex', values='headcount').reset_index()

df_2['category'] = 'Judiciary'

In [71]:
# Business

# Importing data from CSV file
df_bs = pd.read_csv("Data/Politics/WMID_Business_1.csv", encoding='UTF-8')


# Renaming the first column to "time"
df_bs = df_bs.rename(columns={'Unnamed: 0': 'time'})

# Filtering data based on conditions
df_bs2 = df_bs[(df_bs['time'] == '2021-B2') & (df_bs['_UNIT'] == 'NR')]

# Counting number of people in each gender by sector
gender_bs = df_bs2.groupby(['NACE', 'sex']).agg({'value': 'sum'}).reset_index()
gender_bs = gender_bs.rename(columns={'value': 'headcount'})


# Removing unwanted sectors
unwanted_sectors = ["A, B, K-Q & Unknown - aggregate of sectors with few large companies", 
                    "C, E & F (Mining & quarrying; electricity, gas and water; construction)", 
                    "G, H & I (Wholesale/retail, vehicle repair; hotels/restaurants; transport, storage & communication)"]
gender_bs_1 = gender_bs[~gender_bs['NACE'].isin(unwanted_sectors)]

#gender_bs_1 
gender_bs_1 = gender_bs_1.rename(columns={'NACE':'geo'})

df_3 = gender_bs_1.pivot_table(index='geo', columns='sex', values='headcount').reset_index()

df_3['category'] = 'Business'

In [84]:
df_3['geo'].unique()

array(['A Agriculture, hunting and forestry', 'B Fishing',
       'C Mining and quarrying', 'D Manufacturing',
       'E Electricity, gas and water supply', 'F Construction',
       'G Wholesale and retail trade; repair of motor vehicles, motorcycles and personal and household goods',
       'H Hotels and restaurants',
       'I Transport, storage and communication',
       'J Financial intermediation',
       'K Real estate, renting and business activities, consulting',
       'L Public administration and defence; compulsory social security',
       'N Health and social work',
       'O Other community, social and personal service activities'],
      dtype=object)

In [76]:
# Save data
df = pd.concat([df_1, df_2,df_3], axis=0)
df.to_csv("DM_clean.csv",encoding='UTF-8',index=False)

## Paid and Unpaid Data

In [3]:
df_time = pd.read_csv("Data/Employment/Gender_paid_unpaid.csv")

df_time.head()

Unnamed: 0,COU,Country,Indicator,Sex,Unit,Value
0,AUS,Australia,"Time spent in unpaid work, by sex",Men,Minutes per day,171.6
1,AUS,Australia,"Time spent in unpaid work, by sex",Women,Minutes per day,311.0
2,AUS,Australia,"Time spent in paid work, by sex",Men,Minutes per day,304.1
3,AUS,Australia,"Time spent in paid work, by sex",Women,Minutes per day,172.0
4,AUT,Austria,"Time spent in unpaid work, by sex",Men,Minutes per day,135.3


In [5]:
df_paid = df_time[df_time['Indicator'] == 'Time spent in paid work, by sex']
df_unpaid = df_time[df_time['Indicator'] == 'Time spent in unpaid work, by sex']

In [12]:
df_paid_1 = df_paid.pivot_table(index='Country', columns='Sex', values='Value').reset_index()
df_paid_1['Category'] = 'Paid'

df_unpaid_1 = df_unpaid.pivot_table(index='Country', columns='Sex', values='Value').reset_index()
df_unpaid_1['Category'] = 'Unpaid'

df_full = pd.concat([df_paid_1,df_unpaid_1 ],axis=0)

df_full.head()

Sex,Country,Men,Women,Category
0,Australia,304.1,172.0,Paid
1,Austria,364.8,248.8,Paid
2,Belgium,273.7,199.2,Paid
3,Canada,340.5,268.3,Paid
4,China (People's Republic of),390.0,291.0,Paid


In [13]:
df_full.to_csv("work_clean.csv",encoding='UTF-8',index=False)