# Extracting Survey Data
Creating Index out of European Social Survey Data, in order to feed the ABM with actual data

In [26]:
import pandas as pd
import numpy as np

In [27]:
# Load the ESS dataset
ess_data = pd.read_csv('ESS11/ESS11.csv')
ess_data.head()

  ess_data = pd.read_csv('ESS11/ESS11.csv')


Unnamed: 0,name,essround,edition,proddate,idno,cntry,dweight,pweight,nwspol,netusoft,...,rinwe,inwde,jinws,jinwe,inwtm,mode,domain,prob,stratum,psu
0,ESS11e01,11,1.0,20.06.2024,50014,AT,1.185115,0.330915,90,5,...,,2023-11-12 15:26:55,2023-11-12 15:21:28,2023-11-12 15:26:55,30.0,1,2.0,0.000579,107,317
1,ESS11e01,11,1.0,20.06.2024,50030,AT,0.609898,0.330915,90,5,...,,2023-10-18 10:44:18,2023-10-18 10:42:22,2023-10-18 10:44:18,40.0,1,1.0,0.001124,69,128
2,ESS11e01,11,1.0,20.06.2024,50057,AT,1.39233,0.330915,30,5,...,,2023-09-30 14:13:33,2023-09-30 14:08:31,2023-09-30 14:13:33,42.0,1,2.0,0.000493,18,418
3,ESS11e01,11,1.0,20.06.2024,50106,AT,0.556061,0.330915,15,1,...,,2023-06-30 15:11:21,2023-06-30 15:08:05,2023-06-30 15:11:21,34.0,1,1.0,0.001233,101,295
4,ESS11e01,11,1.0,20.06.2024,50145,AT,0.722795,0.330915,60,5,...,,2023-07-11 11:14:03,2023-07-11 11:10:02,2023-07-11 11:14:03,57.0,1,2.0,0.000949,115,344


In [28]:
# Filter rows where 'cntry' is 'DE' >>assuming talent and ses plays different roles in different countries
ess_data = ess_data.loc[ess_data['cntry'] == 'DE']

## Socio-Economic Index

In [29]:
# Filter income variable
ess_data['hinctnta'] = ess_data['hinctnta'].apply(lambda x: x if 1 <= x <= 10 else pd.NA)
ess_data = ess_data.dropna(subset=['hinctnta'])
print(ess_data['hinctnta'])

# Filter education
ess_data['eisced'] = ess_data['eisced'].apply(lambda x: x if 0 <= x <= 7 else pd.NA)
ess_data = ess_data.dropna(subset=['eisced'])
print(ess_data['eisced'])


3738     9
3739     3
3741     8
3742    10
3743     6
        ..
6152     8
6153    10
6154    10
6155     8
6156     5
Name: hinctnta, Length: 2169, dtype: object
3738    6
3739    7
3741    3
3742    7
3743    5
       ..
6152    3
6153    2
6154    7
6155    3
6156    6
Name: eisced, Length: 2161, dtype: object


In [30]:
# Normalize the variables to a 0-1 scale
ess_data['education_norm'] = (ess_data['edulvlb'] - ess_data['edulvlb'].min()) / (ess_data['edulvlb'].max() - ess_data['edulvlb'].min())
ess_data['income_norm'] = (ess_data['hinctnta'] - ess_data['hinctnta'].min()) / (ess_data['hinctnta'].max() - ess_data['hinctnta'].min())

# Create Socio-economic index (with mean)
ess_data['ses_index'] = ess_data[['education_norm', 'income_norm']].mean(axis=1)


## Talent Index
atncrse: "During the last twelve months, have you taken any course or attended any lecture or conference to improve your knowledge or skills for work?"

ipcrtiva: "Important to think new ideas and being creative"

ipshabta: "Important to show abilities and be admired"

In [31]:
# Filter knowledge variable
ess_data['knowledge'] = ess_data['atncrse'].apply(lambda x: x if 1 <= x <= 2 else pd.NA)
ess_data = ess_data.dropna(subset=['knowledge'])

# recode 
ess_data['knowledge'] = ess_data['knowledge'].replace({1: 1, 2: 0})
print(ess_data['knowledge'])

# Filter creative variable
ess_data['creative'] = ess_data['ipcrtiva'].apply(lambda x: x if 1 <= x <= 6 else pd.NA)
ess_data = ess_data.dropna(subset=['creative'])

# recode 
ess_data['creative'] = ess_data['creative'].replace({1: 1, 2: 1, 3:0, 4:0, 5:0, 6:0})
print(ess_data['creative'])

# Filter creative variable
ess_data['abilities'] = ess_data['ipcrtiva'].apply(lambda x: x if 1 <= x <= 6 else pd.NA)
ess_data = ess_data.dropna(subset=['abilities'])

# recode 
ess_data['abilities'] = ess_data['abilities'].replace({1: 1, 2: 1, 3:0, 4:0, 5:0, 6:0})
print(ess_data['abilities'])


3738    1
3739    1
3741    0
3742    1
3743    0
       ..
6152    0
6153    1
6154    1
6155    0
6156    1
Name: knowledge, Length: 2161, dtype: int64
3738    0
3739    1
3741    1
3742    0
3743    0
       ..
6152    1
6153    1
6154    0
6155    1
6156    0
Name: creative, Length: 2131, dtype: int64
3738    0
3739    1
3741    1
3742    0
3743    0
       ..
6152    1
6153    1
6154    0
6155    1
6156    0
Name: abilities, Length: 2131, dtype: int64


In [32]:
# Create Socio-economic index (with mean)
ess_data['talent_index'] = ess_data[['knowledge', 'creative', 'abilities']].mean(axis=1)

## Capital
capital gets proxied with education of parents >>higher education of previous generation 
->higher assets 
->better starting conditions for making use of lucky events

In [33]:
# Fathers highest level of education
ess_data['education_father'] = ess_data['eiscedf'].apply(lambda x: x if 0 <= x <= 7 else pd.NA)
ess_data = ess_data.dropna(subset=['education_father'])

# Mothers highest level of education
ess_data['education_mother'] = ess_data['eiscedm'].apply(lambda x: x if 0 <= x <= 7 else pd.NA)
ess_data = ess_data.dropna(subset=['education_mother'])

In [34]:
# Normalize the variables to a 0-1 scale
ess_data['education_father_norm'] = (ess_data['education_father'] - ess_data['education_father'].min()) / (ess_data['education_father'].max() - ess_data['education_father'].min())
ess_data['education_mother_norm'] = (ess_data['education_mother'] - ess_data['education_mother'].min()) / (ess_data['education_mother'].max() - ess_data['education_mother'].min())

# Create Socio-economic index (with mean) - multiplied by 10, because original starting capital was at 10
ess_data['capital'] = ess_data[['education_father_norm', 'education_mother_norm']].mean(axis=1) *10


In [35]:
# Save the processed talent data
ess_data_subset = ess_data[['idno', 'ses_index', 'talent_index', 'capital']]
ess_data_subset.to_csv('processed_ess_data.csv', index=False)