The NumPy and Pandas packages are required for operating with data frames.

In [1]:
import numpy as np
import pandas as pd

# Data management

## Loading the datasets

The data for measuring social and political attitudes is provided by global survey organizations including the World Values Survey Association (WVSA) and Pew Research Center (PWC). The data for country-level variables like economic development is from international institutions like the World Bank and the UN Human Development Programme.

### World Values Survey (WVS)

In [3]:
wvs7 = pd.read_csv('WVS_Cross-National_Wave_7_csv_v5_0.csv', dtype={519: 'str', 520: 'str', 521: 'str', 522: 'str'})

  wvs7 = pd.read_csv('WVS_Cross-National_Wave_7_csv_v5_0.csv', dtype={519: 'str', 520: 'str', 521: 'str', 522: 'str'})


In [4]:
print(wvs7.iloc[:,519])
print(wvs7.iloc[:,520])
print(wvs7.iloc[:,521])
print(wvs7.iloc[:,522])

0        0.857
1        0.857
2        0.857
3        0.857
4        0.857
         ...  
94273      NaN
94274      NaN
94275      NaN
94276      NaN
94277      NaN
Name: hdi, Length: 94278, dtype: object
0        0.935
1        0.935
2        0.935
3        0.935
4        0.935
         ...  
94273      NaN
94274      NaN
94275      NaN
94276      NaN
94277      NaN
Name: incomeindexHDI, Length: 94278, dtype: object
0        -9999
1        -9999
2        -9999
3        -9999
4        -9999
         ...  
94273      NaN
94274      NaN
94275      NaN
94276      NaN
94277      NaN
Name: humanineqiality, Length: 94278, dtype: object
0        0.951
1        0.951
2        0.951
3        0.951
4        0.951
         ...  
94273      NaN
94274      NaN
94275      NaN
94276      NaN
94277      NaN
Name: lifeexpectHDI, Length: 94278, dtype: object


## Inspecting the datasets

In [5]:
type(wvs7)

pandas.core.frame.DataFrame

In [7]:
wvs7.head(10)

Unnamed: 0,version,doi,A_WAVE,A_YEAR,A_STUDY,B_COUNTRY,B_COUNTRY_ALPHA,C_COW_NUM,C_COW_ALPHA,D_INTERVIEW,...,WVS_Polmistrust_PartyVoter,WVS_LR_MedianVoter,WVS_LibCon_MedianVoter,v2psbars,v2psorgs,v2psprbrch,v2psprlnks,v2psplats,v2xnp_client,v2xps_party
0,5-0-0 (2022-12-23),doi.org/10.14281/18241.20,7,2018,2,20,AND,232,AND,20070001,...,62.434211,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0
1,5-0-0 (2022-12-23),doi.org/10.14281/18241.20,7,2018,2,20,AND,232,AND,20070002,...,62.434211,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0
2,5-0-0 (2022-12-23),doi.org/10.14281/18241.20,7,2018,2,20,AND,232,AND,20070003,...,62.434211,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0
3,5-0-0 (2022-12-23),doi.org/10.14281/18241.20,7,2018,2,20,AND,232,AND,20070004,...,,,,,,,,,,
4,5-0-0 (2022-12-23),doi.org/10.14281/18241.20,7,2018,2,20,AND,232,AND,20070005,...,66.964286,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0
5,5-0-0 (2022-12-23),doi.org/10.14281/18241.20,7,2018,2,20,AND,232,AND,20070006,...,,,,,,,,,,
6,5-0-0 (2022-12-23),doi.org/10.14281/18241.20,7,2018,2,20,AND,232,AND,20070007,...,,,,,,,,,,
7,5-0-0 (2022-12-23),doi.org/10.14281/18241.20,7,2018,2,20,AND,232,AND,20070008,...,,,,,,,,,,
8,5-0-0 (2022-12-23),doi.org/10.14281/18241.20,7,2018,2,20,AND,232,AND,20070009,...,66.964286,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0
9,5-0-0 (2022-12-23),doi.org/10.14281/18241.20,7,2018,2,20,AND,232,AND,20070010,...,,,,,,,,,,


### Pew Research Center (PWC)'s Global Attitudes Survey (GAS)

#### Loading each wave with at least 33 included countries

In [3]:
summer_2002 = pd.read_spss('./pew_research_center/gap_summer_2002_44_countries/Pew GAP final 44 country dataset 1.1sav.sav') # summer_2002 #44 countries

In [6]:
spring_2007 = pd.read_spss('./pew_research_center/gap_spring_2007_47_countries/GAP_2007_Data.sav') # spring_2007 #48 countries

ReadstatError: Unable to convert string to the requested encoding (invalid byte sequence)

In [8]:
spring_2014 = pd.read_spss('./pew_research_center/gap_spring_2014_44_countries/Pew Research Global Attitudes Spring 2014 Dataset for Web.sav') # spring_2014 #44 countries

In [9]:
spring_2015 = pd.read_spss('./pew_research_center/gap_spring_2015_40_countries/Pew Research Global Attitudes Spring 2015 Dataset for Web FINAL.sav') #40 countries

In [10]:
spring_2017 = pd.read_spss('./pew_research_center/gap_spring_2017_38_countries/Pew Research Global Attitudes Spring 2017 Dataset WEB FINAL.sav') # spring_2017 #38 countries

In [11]:
spring_2019 = pd.read_spss('./pew_research_center/gap_spring_2019_34_countries/Pew Research Center Global Attitudes Spring 2019 Dataset WEB.sav') #34 countries

#### Checking each wave

In [13]:
print(type(summer_2002))
print(type(spring_2014))
print(type(spring_2015))
print(type(spring_2017))
print(type(spring_2019))

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>


In [14]:
print(summer_2002.shape)
print(spring_2014.shape)
print(spring_2015.shape)
print(spring_2017.shape)
print(spring_2019.shape)

(38263, 399)
(48643, 1112)
(45435, 896)
(41953, 875)
(38426, 607)


In [15]:
print(summer_2002.head(5))
print(spring_2014.head(5))
print(spring_2015.head(5))
print(spring_2017.head(5))
print(spring_2019.head(5))

     country  psraid  quest_id                       q1     q2     q3  \
0  Argentina     1.0       1.0   A particularly bad day   Four  Eight   
1  Argentina     2.0       2.0  A particularly good day  Three    Six   
2  Argentina     3.0       3.0  A particularly good day   Nine  Seven   
3  Argentina     4.0       4.0  A particularly good day  Eight  Eight   
4  Argentina     5.0       5.0            A typical day   Four  Seven   

                         q4                                q5.1rec  \
0                     Eight                       Social relations   
1                     Three                                  Crime   
2  Don't know (DO NOT READ)  Economic problems, financial problems   
3  Don't know (DO NOT READ)  Economic problems, financial problems   
4  Don't know (DO NOT READ)                                 Health   

                                 q5.2rec                   q5.3rec  ...  \
0                                 Health                       Na

## Extracting the variables

### World Values Survey (WVS)

In [9]:
wvs7_selected = wvs7.loc[:, ['B_COUNTRY_ALPHA', 'Q262', #demographics
#liberalism vs. authoritarianism
'Q106', 'Q107', 'Q108', 'Q109', 'Q110', 'Q241', 'Q244', 'Q247']]  #socialism vs. free market
#progressivism vs. conservatism
#science
#religion
#environmental protection

## Inspecting the datasets

### World Values Survey (WVS)

In [10]:
wvs7_selected

Unnamed: 0,B_COUNTRY_ALPHA,Q262,Q106,Q107,Q108,Q109,Q110,Q241,Q244,Q247
0,AND,60,5,8,2,10,2,7,8,7
1,AND,47,5,2,2,2,2,9,6,4
2,AND,48,7,5,5,2,4,7,7,4
3,AND,62,5,8,5,4,8,9,10,7
4,AND,49,4,7,7,2,6,8,7,6
...,...,...,...,...,...,...,...,...,...,...
94273,NIR,83,8,4,6,4,3,1,10,5
94274,NIR,34,10,5,1,4,10,10,9,1
94275,NIR,19,10,5,5,8,9,10,5,5
94276,NIR,19,2,1,1,9,9,-1,-1,-1


# Data transformation

## Cleaning the data

In [20]:
wvs7_clean = wvs7_selected.loc[(wvs7_selected['Q262'] > 0) & 
                  (wvs7_selected['Q106'] > 0) & 
                  (wvs7_selected['Q107'] > 0) & 
                  (wvs7_selected['Q108'] > 0) & 
                  (wvs7_selected['Q109'] > 0) & 
                  (wvs7_selected['Q110'] > 0) & 
                  (wvs7_selected['Q241'] > 0) & 
                  (wvs7_selected['Q244'] > 0) & 
                  (wvs7_selected['Q247'] > 0)]

## Building indices

In [24]:
wvs7_clean['free_market'] = (wvs7_clean['Q106'] + (wvs7_clean['Q107']*-1+10) + wvs7_clean['Q108'] + wvs7_clean['Q109'] + (wvs7_clean['Q110']*-1+10) + (wvs7_clean['Q241']*-1+10) + (wvs7_clean['Q244']*-1+10) + (wvs7_clean['Q247']*-1+10))/8

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wvs7_clean['free_market'] = (wvs7_clean['Q106'] + (wvs7_clean['Q107']*-1+10) + wvs7_clean['Q108'] + wvs7_clean['Q109'] + (wvs7_clean['Q110']*-1+10) + (wvs7_clean['Q241']*-1+10) + (wvs7_clean['Q244']*-1+10) + (wvs7_clean['Q247']*-1+10))/8


## Inspecting the data

In [25]:
wvs7_clean

Unnamed: 0,B_COUNTRY_ALPHA,Q262,Q106,Q107,Q108,Q109,Q110,Q241,Q244,Q247,free_market
0,AND,60,5,8,2,10,2,7,8,7,4.375
1,AND,47,5,2,2,2,2,9,6,4,4.500
2,AND,48,7,5,5,2,4,7,7,4,4.625
3,AND,62,5,8,5,4,8,9,10,7,2.750
4,AND,49,4,7,7,2,6,8,7,6,3.625
...,...,...,...,...,...,...,...,...,...,...,...
94272,NIR,55,1,10,10,1,1,10,10,5,3.250
94273,NIR,83,8,4,6,4,3,1,10,5,5.625
94274,NIR,34,10,5,1,4,10,10,9,1,3.750
94275,NIR,19,10,5,5,8,9,10,5,5,4.875


# Data analysis

In [29]:
age_market = wvs7_clean.groupby('B_COUNTRY_ALPHA')[['Q262','free_market']].corr().unstack().iloc[:,1]

In [37]:
pd.set_option('display.max_rows', None)

In [40]:
age_market.sort_values()

B_COUNTRY_ALPHA
CHN   -0.249683
ARM   -0.175550
CZE   -0.166140
RUS   -0.136199
UKR   -0.135939
SVK   -0.133365
MMR   -0.128758
KGZ   -0.113294
SRB   -0.110674
IDN   -0.107276
MDV   -0.104537
VEN   -0.101217
COL   -0.097188
NLD   -0.096912
VNM   -0.077745
PRI   -0.077697
GRC   -0.074012
CYP   -0.066171
MAC   -0.059481
SGP   -0.059382
JOR   -0.057696
TUN   -0.050629
ROU   -0.041993
URY   -0.041054
TWN   -0.039991
PHL   -0.039653
TJK   -0.039251
KAZ   -0.038870
KEN   -0.038803
IRN   -0.032089
ZWE   -0.028112
THA   -0.016420
NIC   -0.015956
TUR   -0.007781
BOL   -0.004609
PER   -0.002033
IRQ    0.000419
PAK    0.003278
NGA    0.003320
EGY    0.012524
MNG    0.015764
BRA    0.016496
MYS    0.018576
DEU    0.020397
HKG    0.022292
LBY    0.029690
AUS    0.030198
AND    0.030311
MEX    0.031593
JPN    0.031702
CHL    0.035710
ARG    0.036131
GTM    0.043574
ETH    0.044387
BGD    0.049697
NIR    0.052648
NZL    0.054676
ECU    0.055421
KOR    0.070066
MAR    0.080515
GBR    0.085176
CAN    0

# Data representation