In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore', category=UserWarning, module='openpyxl')

In [2]:
#read in EU
eu_df = pd.read_csv('./additional_data/EU27_COUNTRY_LIST.csv')

In [5]:
#read in cleaned data-sets with respective index values
violence = pd.read_csv('./datasets_cleaned/masterviolence_pht_df.csv')
care = pd.read_csv('./datasets_cleaned/master_care_df.csv')
eco_sector = pd.read_csv('./datasets_cleaned/Economic sector representation 2013-2022.csv')
employment = pd.read_csv('./datasets_cleaned/Employment by sex and age.csv')
pay = pd.read_csv('./datasets_cleaned/Gender Pay Gap 2009-2020.csv')
decision = pd.read_csv('./datasets_cleaned/Members of national parliaments.csv')
pension = pd.read_csv('./datasets_cleaned/Pension gap 2012-2021.csv')





In [3]:
#filter for chosen timeframe - here my year dates overlap between 2008 and 2020
care_date_filter = care[(care['Year'] >= 2008) & (care['Year'] < 2021)]
#drop old index columns generated when importing in the csv
care_date_filter = care_date_filter.drop(care_date_filter.columns[0], axis=1)
#sort alphabetically and by year
care_date_filter.sort_values(by=['Country','Year'], inplace=True, ascending=True)
#reset the index to allow for merging data frames
care_date_filter = care_date_filter.reset_index(drop=True)
#care_date_filter.to_csv('caretest.csv')

NameError: name 'care' is not defined

In [6]:
care_date_filter

Unnamed: 0,Country,Year,care_gap_%_active_population,IndexValueCare
0,Austria,2008,32.2,0.678
1,Austria,2009,32.6,0.674
2,Austria,2010,28.5,0.715
3,Austria,2011,28.0,0.720
4,Austria,2012,26.5,0.735
...,...,...,...,...
346,Sweden,2016,0.0,0.000
347,Sweden,2017,0.0,0.000
348,Sweden,2018,0.0,0.000
349,Sweden,2019,0.0,0.000


In [7]:
# preparing violence dataframe for merge - drop secondary index axis, sort country alphabetically 
violence = violence.drop(violence.columns[0], axis=1)
violence.sort_values(by=['Country','Year'], inplace=True, ascending=True)
violence_reset = violence.reset_index(drop=True)
violence_reset

Unnamed: 0,Country,Year,Homicide_female_victims,Rape_female_victims,SexualAssault_female_victims,Average_no_female_victims_per_100k,IndexValueViolence
0,Austria,2008,0.91,13.32,19.76,11.330000,0.909360
1,Austria,2009,1.31,14.12,18.44,11.290000,0.909680
2,Austria,2010,0.77,25.69,15.00,13.820000,0.889440
3,Austria,2011,0.81,26.33,17.29,14.810000,0.881520
4,Austria,2012,0.88,25.92,18.10,14.966667,0.880267
...,...,...,...,...,...,...,...
346,Sweden,2016,0.59,118.27,197.27,105.376667,0.156987
347,Sweden,2017,0.54,129.23,203.98,111.250000,0.110000
348,Sweden,2018,0.66,140.46,197.99,113.036667,0.095707
349,Sweden,2019,0.49,150.13,192.93,114.516667,0.083867


In [8]:
#merge dataframes - only adding the index values
master_index = violence_reset.merge(care_date_filter['IndexValueCare'], left_index=True, right_index=True)
master_index

Unnamed: 0,Country,Year,Homicide_female_victims,Rape_female_victims,SexualAssault_female_victims,Average_no_female_victims_per_100k,IndexValueViolence,IndexValueCare
0,Austria,2008,0.91,13.32,19.76,11.330000,0.909360,0.678
1,Austria,2009,1.31,14.12,18.44,11.290000,0.909680,0.674
2,Austria,2010,0.77,25.69,15.00,13.820000,0.889440,0.715
3,Austria,2011,0.81,26.33,17.29,14.810000,0.881520,0.720
4,Austria,2012,0.88,25.92,18.10,14.966667,0.880267,0.735
...,...,...,...,...,...,...,...,...
346,Sweden,2016,0.59,118.27,197.27,105.376667,0.156987,0.000
347,Sweden,2017,0.54,129.23,203.98,111.250000,0.110000,0.000
348,Sweden,2018,0.66,140.46,197.99,113.036667,0.095707,0.000
349,Sweden,2019,0.49,150.13,192.93,114.516667,0.083867,0.000


In [9]:
#drop unncessary columns from violence dataframe
master_index = master_index.drop(master_index.columns[2:6],axis=1)
master_index['NonZeroCount'] = master_index.iloc[:,2:4].gt(0).sum(axis=1)
master_index

Unnamed: 0,Country,Year,IndexValueViolence,IndexValueCare,NonZeroCount
0,Austria,2008,0.909360,0.678,2
1,Austria,2009,0.909680,0.674,2
2,Austria,2010,0.889440,0.715,2
3,Austria,2011,0.881520,0.720,2
4,Austria,2012,0.880267,0.735,2
...,...,...,...,...,...
346,Sweden,2016,0.156987,0.000,1
347,Sweden,2017,0.110000,0.000,1
348,Sweden,2018,0.095707,0.000,1
349,Sweden,2019,0.083867,0.000,1


In [10]:
#generating an overall index - take the average of the nonzero individual index values (i.e. the NonZeroCount)
#scale to 0-100 index - inspired by https://ourworldindata.org/human-development-index#:~:text=The%20HDI%20is%20calculated%20as,and%20expected%20years%20of%20schooling). 
master_index['IndexTotal'] = ((master_index['IndexValueViolence'] + master_index['IndexValueCare']) / master_index['NonZeroCount']) * 100
master_index = master_index.drop('NonZeroCount', axis = 1)
master_index['IndexTotal'] = master_index['IndexTotal'].replace(np.nan, 0)
master_index

Unnamed: 0,Country,Year,IndexValueViolence,IndexValueCare,IndexTotal
0,Austria,2008,0.909360,0.678,79.368000
1,Austria,2009,0.909680,0.674,79.184000
2,Austria,2010,0.889440,0.715,80.222000
3,Austria,2011,0.881520,0.720,80.076000
4,Austria,2012,0.880267,0.735,80.763333
...,...,...,...,...,...
346,Sweden,2016,0.156987,0.000,15.698667
347,Sweden,2017,0.110000,0.000,11.000000
348,Sweden,2018,0.095707,0.000,9.570667
349,Sweden,2019,0.083867,0.000,8.386667


In [11]:
master_index.to_csv('./datasets_cleaned/master_index.csv')