In [1]:
##Part 1 Install libararies 
#Run the following line if any of the following libraries are not installed
!pip install pandas numpy



In [2]:
##Part 2: loading necessary packages
import pandas as pd
import numpy as np
from IPython.display import FileLink

In [None]:
##Part 3: Loading the data from EVS and performing adjustments
EVS = pd.read_stata(r"C:\Users\Lenovo\Downloads\ZA7503_v3-0-0.dta\ZA7503_v3-0-0.dta", convert_categoricals=False)

#Define variables to keep
variables_to_keep_EVS = ['S020', 'A008', 'A165', 'G006', 'X001', 'X003', 'S009', 'S007', 'A170', 'X007', 'X028', 'A009', 'F063', 'X047_EVS']

#Select those columns from the data that match with the variables to keep
EVS_reduced = EVS[variables_to_keep_EVS]

#Rename variables for interpretability
EVS_reduced = EVS_reduced.rename(columns = {'S020': 'year', 'S009': 'country', 'A008': 'happiness', 'A165': 'people_trustful', 'X001': 'sex', 'X003': 'age', 
                                            'G006': 'proud_nationality', 'S007': 'respondent','A170': 'life_satisfaction', 'X007': 'marital_status', 
                                            'X028': 'employment_status', 'A009': 'health', 'F063': 'god_important', 'X047_EVS': 'income_class'})

name_change_EVS = {'AL': 'Albania', 'AZ': 'Azerbaijan', 'AT': 'Austria', 'AM': 'Armenia', 'BE': 'Belgium', 'BA': 'Bosnia and Herzegovina', 'BG': 'Bulgaria',
                'BY': 'Belarus', 'CA': 'Canada', 'HR': 'Croatia', 'CY': 'Cyprus', 'CY-TCC': 'Cyprus (Turkish Cypriot Community)', 'CZ': 'Czechia', 
                'DK': 'Denmark', 'EE': 'Estonia', 'FI': 'Finland', 'FR': 'France', 'GE': 'Georgia', 'DE': 'Germany', 'GR': 'Greece', 'HU': 'Hungary', 
                'IS': 'Iceland', 'IE': 'Ireland', 'IT': 'Italy', 'LV': 'Latvia', 'LT': 'Lithuania', 'LU': 'Luxembourg', 'MT': 'Malta', 'MD': 'Moldova', 
                'ME': 'Montenegro', 'NL': 'Netherlands', 'NO': 'Norway', 'PL': 'Poland', 'PT': 'Portugal', 'RO': 'Romania', 'RU': 'Russia', 'RS': 'Serbia', 
                'SK': 'Slovakia', 'SI': 'Slovenia', 'ES': 'Spain', 'SE': 'Sweden', 'CH': 'Switzerland', 'TR': 'Turkey', 'UA': 'Ukraine', 
                'MK': 'North Macedonia', 'GB-GBN': 'United Kingdom', 'US': 'United States', 'GB-NIR': 'United Kingdom', 'RS-KM': 'Kosovo'}

#Change country abbreviations to full country names
EVS_reduced['country'] = EVS_reduced['country'].replace(name_change_EVS)

# Replace values lower than 0 with NaN for each variable in the list
EVS_numeric = EVS_reduced.select_dtypes(include=['number'])
for col in EVS_numeric.columns:
    EVS_reduced[col] = EVS_reduced[col].apply(lambda x: np.nan if x < 0 else x)

#Create dummy variables for full employed, unemployed and married or as if married
EVS_reduced['full_employed'] = (EVS_reduced['employment_status'] == 1).astype(int)
EVS_reduced['unemployed'] = (EVS_reduced['employment_status'] == 7).astype(int)
EVS_reduced['marital_relationship'] = EVS_reduced['marital_status'].isin([1, 2]).astype(int)

#Drop the region, marital_status and employment_status variables
EVS_reduced = EVS_reduced.drop(['employment_status', 'marital_status'], axis=1)

   studyno             version                  doi  stdyno_w  \
0     7503  3.0.0 (2022-12-14)  doi:10.4232/1.14021      4800   
1     7503  3.0.0 (2022-12-14)  doi:10.4232/1.14021      4800   
2     7503  3.0.0 (2022-12-14)  doi:10.4232/1.14021      4800   
3     7503  3.0.0 (2022-12-14)  doi:10.4232/1.14021      4800   
4     7503  3.0.0 (2022-12-14)  doi:10.4232/1.14021      4800   

              versn_w  S001  S002EVS  s002vs  S003  COW_NUM  ...  X048H_N1  \
0  5.0.0 (2022-06-08)     1        4       5     8      339  ...        -4   
1  5.0.0 (2022-06-08)     1        4       5     8      339  ...        -4   
2  5.0.0 (2022-06-08)     1        4       5     8      339  ...        -4   
3  5.0.0 (2022-06-08)     1        4       5     8      339  ...        -4   
4  5.0.0 (2022-06-08)     1        4       5     8      339  ...        -4   

   X048I_N2  X049  x049a X049CS  X050  X051  X052  Y001  Y002  
0        -4    -5      5     -4    -4    -4    -4    -4     2  
1        -4 

In [20]:
##Part 4: Download the adjusted EVS data
# Save the DataFrame as a CSV
EVS_reduced.to_csv('EVS_adjusted.csv', index=False)

# Create download link
FileLink('EVS_adjusted.csv')