# Filter for sex per country

In [71]:
import os

import pandas as pd

BASE = os.path.join(os.pardir, "data")

In [73]:
filename = "teilm022.pkl"
df = pd.read_pickle(os.path.join(BASE, 
                                 "pickles", 
                                 filename))
df.head()

Unnamed: 0,s_adj,age,sex,unit,geo,2019m09,2019m10,2019m11,2019m12,2020m01,2020m02,2020m03,2020m04,2020m05,2020m06,2020m07,2020m08
0,SA,Y25-74,F,PC_ACT,AT,3.7,3.8,3.7,3.7,3.7,3.6,3.7,4.0,4.2,4.1,3.9,
1,SA,Y25-74,F,PC_ACT,BE,4.3,4.1,3.9,3.9,4.1,4.2,4.4,4.5,4.4,4.4,4.4,
2,SA,Y25-74,F,PC_ACT,BG,3.5,3.6,3.6,3.5,3.8,3.6,3.6,4.2,4.0,3.9,3.9,
3,SA,Y25-74,F,PC_ACT,CH,4.0,3.8,3.6,3.7,4.0,4.1,4.1,4.0,4.1,4.2,,
4,SA,Y25-74,F,PC_ACT,CY,7.0,6.8,6.6,6.3,6.0,5.6,5.9,7.3,7.3,6.1,5.3,


In [74]:
# Save the parameters in case we need to reference
params = df.iloc[0][0:4]
# Check how what unique values
df["sex"].unique()

array(['F', 'M', 'T'], dtype=object)

In [75]:
df = df.drop(columns=["s_adj", "age", "unit"])

In [76]:
# Take average of year data
df["mean"] = df[df.columns[2:]].mean(axis=1)

In [77]:
# Transform the table
df = (df
     .drop(columns=t.columns[2:-1])
     .pivot(index="geo", columns="sex", values="mean")
     .round(2))
df

sex,F,M,T
geo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AT,3.83,4.15,4.01
BE,4.24,4.83,4.56
BG,3.75,4.25,4.03
CH,3.96,3.71,3.81
CY,6.38,5.63,6.0
CZ,2.26,1.75,1.98
DE,3.16,3.67,3.44
DK,4.76,3.86,4.29
EA,7.06,6.28,6.66
EA18,7.07,6.28,6.66


## Absolute numbers
To get fractions we need to work with absolute numbers. We repeat the process for a different table and then calculate the fractions.

In [105]:
# Open file
filename = "teilm010.pkl"
abs_df = pd.read_pickle(os.path.join(BASE, 
                                     "pickles", 
                                     filename))
abs_df = abs_df.drop(columns=["s_adj", "age", "unit"])
abs_df.head()

Unnamed: 0,sex,geo,2019m09,2019m10,2019m11,2019m12,2020m01,2020m02,2020m03,2020m04,2020m05,2020m06,2020m07,2020m08
0,F,AT,88,89,88,90,92,91.0,93.0,101.0,104.0,102.0,96.0,
1,F,BE,117,113,110,110,115,117.0,120.0,125.0,124.0,123.0,121.0,
2,F,BG,56,58,58,57,61,59.0,59.0,72.0,68.0,64.0,64.0,
3,F,CH,106,100,97,98,102,102.0,104.0,103.0,105.0,106.0,,
4,F,CY,16,16,16,15,14,13.0,13.0,17.0,18.0,15.0,13.0,


In [106]:
abs_df["mean"] = abs_df[abs_df.columns[2:]].mean(axis=1)
abs_df = (abs_df
         .drop(columns=t.columns[2:-1])
         .pivot(index="geo", columns="sex", values="mean")
         .round(2))
abs_df

sex,F,M,T
geo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AT,94.0,119.09,213.09
BE,117.73,150.18,267.55
BG,61.45,83.18,144.45
CH,102.3,112.7,214.8
CY,15.09,15.64,30.45
CZ,59.64,58.91,118.45
DE,692.91,924.18,1617.0
DK,78.45,79.27,157.82
EA,5913.91,6262.64,12176.82
EA18,5868.73,6202.45,12071.18


In [107]:
# Calculate fractions

""" NOTE: The fraction of the total unemployed is not completely 
clear since the size of female vs male population might differ """

abs_df["frac_f"] = round(abs_df["F"] / abs_df["T"], 4) * 100
abs_df["frac_m"] = round(abs_df["M"] / abs_df["T"], 4) * 100

In [108]:
# Drop the absolute numbers for female / male
abs_df.drop(columns=["F", "M"], inplace=True)
abs_df.rename(columns={"T": "abs_t"}, inplace=True)

In [109]:
df.merge(abs_df, how="outer", on="geo")

sex,F,M,T,abs_t,frac_f,frac_m
geo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AT,3.83,4.15,4.01,213.09,44.11,55.89
BE,4.24,4.83,4.56,267.55,44.0,56.13
BG,3.75,4.25,4.03,144.45,42.54,57.58
CH,3.96,3.71,3.81,214.8,47.63,52.47
CY,6.38,5.63,6.0,30.45,49.56,51.36
CZ,2.26,1.75,1.98,118.45,50.35,49.73
DE,3.16,3.67,3.44,1617.0,42.85,57.15
DK,4.76,3.86,4.29,157.82,49.71,50.23
EA,7.06,6.28,6.66,12176.82,48.57,51.43
EA18,7.07,6.28,6.66,12071.18,48.62,51.38


In [119]:
# Export
df.to_pickle(os.path.join(BASE, "output", "filtered_sex_country.pkl"))