In [340]:
# Recode YouGov File to match ANES
#including changing ANES predictor column names
#and data from both files

In [None]:
import pandas as pd
import numpy as np


df_yougov_filepath = r"C:\Users\kirin\Downloads\W1_W2_W3_Merged_saved (1).dta" # raw .dta yougov file
df_anes_filepath = r"C:\Users\kirin\Downloads\anes2020\anes_timeseries_2020_stata_20220210.dta" # raw ANES 2020 timeseries

df_yougov = pd.read_stata(df_yougov_filepath, convert_categoricals=True) # categorical data for easy recoding
df_yougov_nums = pd.read_stata(df_yougov_filepath, convert_categoricals=False) # we set a few columns to numeric to allow for easier code matching. 

df_yougov['abortion'] = df_yougov_nums['abortion'] ### adding abortion


df_anes = pd.read_stata(df_anes_filepath, convert_categoricals=False)
### pre election variables: start with V201
post_col_names = {'V202242x', 'V202245x', 'V202266', 'V202267', 'V202268', 'V202468x'} ## add predictors from post election . Remove these if not enough data
mask = df_anes.columns.str.startswith("V201") | df_anes.columns.isin(post_col_names) # keep pre election plus extra vars
df_anes = df_anes.loc[:, mask]

df_anes[df_anes < 0] = np.nan #negative numbers are used to signify categories like "refused" or "inapplicable"
df_anes[(df_anes == 998) | (df_anes == 999) | (df_anes == 99)] = np.nan # 998/999 are don't know how to respond and 99 is no response

In [None]:
### todo: Racial Resentment, Moral Traditionalism, Republican Party / Democratic Party
df_anes = df_anes.rename(columns={'V201028': 'turnout20post', 
                                  'V201029': 'presvote20post',
                                  'V201039': 'housevote20post',
                                  'V201049': 'senvote20post',
                                  #'V201128': 'trump_presidential_approval',
                                  'V201151': 'pff_jb',
                                  'V201152': 'pff_dt',
                                  'V201200': 'ideo7',
                                  'V201231x': 'pid7',
                                  #'V201234': 'para_social_grid_2',
                                  'V201351': 'election_fairnness',
                                 # 'V201370': 'cab_b', #w123
                                  'V201510': 'educ',
                                  'V201549x': 'race', 
                                  'V201558x': 'hispanic',
                                  'V201602': 'partisan_violence',
                                  'V201336': 'abortion',
                                  'V202242x': 'immigrant_citizenship',
                                  'V202245x': 'immigrant_deport',
                                  'V202266': 'auth_grid_1',
                                  'V202267': 'auth_grid_3',
                                  'V202268': 'auth_grid_2', # potentially incorrect numbering
                                  'V202468x': 'faminc_new' #reordered
                                  #'V202483': 'wc_together', # all of these are separated by wave
                                 # 'V202487': 'wc_jobs',
                                 # 'V202516': 'hardworkingvlazy',
                                  #'V202522': 'pronenot_violence',
                                  #'V202527': 'group_disc_black',
                                  #'V202528': 'group_disc_hispanic',
                                  #'V202530': 'group_disc_white'
                                  })
combined = [
                   'christian_nationalism',
                   'turnout20post', 
                   'presvote20post',
                   'housevote20post',
                   'senvote20post',
                   #'trump_presidential_approval',
                   'pff_jb',
                   'pff_dt',
                   'ideo7',
                   'pid7',
                  # 'para_social_grid_2',
                   'election_fairnness',
                   #'cab_b', w1, w2, w3
                   'educ',
                   'race',
                   'hispanic',
                   'partisan_violence',
                   'immigrant_citizenship',
                   'immigrant_deport',
                   'auth_grid_1',
                   'auth_grid_3',
                   'auth_grid_2',
                   'faminc_new'
                  # 'wc_together', w1, w2, w3
                   #'wc_jobs',
                   #'racial_id',
                   #'hardworkingvlazy',
                   #'pronenot_violence',
                   #'group_disc_black',
                   #'group_disc_hispanic',
                   #'group_disc_white'
                   ] 
df_yougov = df_yougov[combined] # disable this to generate test set

In [343]:
#import re 
# function to extract the number from the string
#def extract_number(entry):
#    if isinstance(entry, str):
#        match = re.search(r'-?\d+(\.\d+)?', entry)
#        if match:
#            return float(match.group())  # Convert matched number to float
#    elif isinstance(entry, (int, float)):
#        return entry  # In case the entry already a float or int, return it as is
#    return np.nan # if not a number , nan
#
# Apply function to all relevant columns
#for column in df_anes.columns:
#    df_anes[column] = df_anes[column].apply(extract_number).astype(float, errors='ignore')

In [344]:
print(df_anes['turnout20post'].unique())

[-1.  1.  2. -9.]


In [345]:
#RACE
#1. White, non-Hispanic
#2. Black, non-Hispanic
#3. Hispanic
#4. Asian or Native Hawaiian/other Pacific Islander, non-Hispanic
#alone


# Create the 'white' column based on the 'race' column
df_yougov['white'] = df_yougov['race'].apply(lambda x: 1 if x == 'White' else 0)

In [346]:
print(df_anes['race'])

0       3.0
1       4.0
2       1.0
3       4.0
4       5.0
       ... 
8275    1.0
8276    1.0
8277    3.0
8278    1.0
8279    4.0
Name: race, Length: 8280, dtype: float64


In [347]:
df_anes['white'] = df_anes['race'].apply(lambda x: 1 if x == 1.0 else 0)

In [348]:
#turnout20post = V201028
# ANES: 
#-9 refused
# -1 inapplicable
# 1 Yes
# 2 No

mapping = {
    'Yes': 1.0,
    'No': 2.0
}

# Apply the mapping to the yougov column, using NaN for other entries
df_yougov['turnout20post'] = df_yougov['turnout20post'].map(mapping).fillna(np.nan).astype('float')

In [349]:
# Count the number of 1.0s and 2.0s
counts = df_yougov['turnout20post'].value_counts()

# Print the counts for 1.0 and 2.0
print('1: ', counts.get(1.0, 0))
print('2: ', counts.get(2.0, 0))
print(df_yougov['turnout20post'].unique())

1:  1767
2:  645
[nan  1.  2.]


In [350]:
# now recode ANES
print(df_anes['turnout20post'].unique())
#already done; nan, 1, 2

[-1.  1.  2. -9.]


In [351]:
#presvote20post = V201029
#-9. Refused
#-1. Inapplicable
#1. Joe Biden
#2. Donald Trump
#3. Jo Jorgensen
#4. Howie Hawkins
#5. Other candidate {SPECIFY}
#12. Specified as refused

#remapping biden = 1 trump = 2

mapping = {
    'Joe Biden': 1.0,
    'Donald Trump': 2.0,
}
# Apply the mapping to the yougov column, using NaN for other entries
df_yougov['presvote20post'] = df_yougov['presvote20post'].map(mapping).fillna(np.nan).astype('float')

In [352]:
print(df_yougov['presvote20post'].unique())

[nan  1.  2.]


In [353]:
df_anes.loc[~df_anes['presvote20post'].isin([1.0, 2.0]), 'presvote20post'] = np.nan

In [354]:
print(df_anes['presvote20post'].unique())

[nan  1.  2.]


In [355]:
### housevote20post = V201039
# map 1 to democrat 2 to republican 
mapping = {
    'Yes, I voted for the Republican candidate': 2.0,
    'Yes, I voted for the Democratic candidate': 1.0,
}
# Apply the mapping to the yougov column, using NaN for other entries
df_yougov['housevote20post'] = df_yougov['housevote20post'].map(mapping).fillna(np.nan).astype('float')

In [356]:
print(df_yougov['housevote20post'].unique())

[nan  1.  2.]


In [357]:
df_anes.loc[~df_anes['housevote20post'].isin([1.0, 2.0]), 'housevote20post'] = np.nan

In [358]:
print(df_anes['housevote20post'].unique())

[nan  1.  2.]


In [359]:
###senvote20post = V201049
# 1 dem 2 rep

mapping = {
    'Yes, I voted for the Republican candidate': 2.0,
    'Yes, I voted for the Democratic candidate': 1.0,
}
# Apply the mapping to the yougov column, using NaN for other entries
df_yougov['senvote20post'] = df_yougov['senvote20post'].map(mapping).fillna(np.nan).astype('float')

In [360]:
print(df_yougov['senvote20post'].unique())

[nan  1.  2.]


In [361]:
df_anes.loc[~df_anes['senvote20post'].isin([1.0, 2.0]), 'senvote20post'] = np.nan

In [362]:
print(df_anes['senvote20post'].unique())

[nan  1.  2.]


In [363]:
### pff_jb = V201151

mapping = {
    'Very unfavorable': 1.0,
    'Somewhat unfavorable': 2.0,
    'Neutral': 3.0,
    'Somewhat favorable': 4.0,
    'Very favorable': 5.0
}
# Apply the mapping to the yougov column, using NaN for other entries
df_yougov['pff_jb'] = df_yougov['pff_jb'].map(mapping).fillna(np.nan).astype('float')


In [364]:
# Count the number of 1.0s and 2.0s
counts = df_yougov['pff_jb'].value_counts()

# Print the counts for 1.0 and 2.0
print('1: ', counts.get(1.0, 0))
print('2: ', counts.get(2.0, 0))
print('3: ', counts.get(3.0, 0))
print('4: ', counts.get(4.0, 0))
print('5: ', counts.get(5.0, 0))


1:  902
2:  244
3:  362
4:  430
5:  479


In [365]:
#anes feeling thermom

# Convert all entries to numeric, setting errors='coerce' will replace non-numeric values with NaN
df_anes['pff_jb'] = pd.to_numeric(df_anes['pff_jb'], errors='coerce')

# Replace entries that are not between 0 and 100 (inclusive) with NaN
df_anes.loc[~((df_anes['pff_jb'] >= 0) & (df_anes['pff_jb'] <= 100)), 'pff_jb'] = np.nan

In [366]:
print(df_anes['pff_jb'].unique())

[  0.  65.  70.  15.  85.  50. 100.  60.  75.  40.  80.  30.  72.  nan
  20.  90.  95.  55.  84.  35.  97.  10.  58.  96.  86.  62.   5.   1.
  25.  51.   9.  61.  87.  49.   2.  48.  77.  66.  13.   3.  89.  45.
   4.  88.  99.  98.  67.  11.   6.  93.  33.  31.  83.  52.  26.  12.
  63.  73.   7.  36.  69.  29.]


In [367]:
### V201152 = pff_dt
mapping = {
    'Very unfavorable': 1.0,
    'Somewhat unfavorable': 2.0,
    'Neutral': 3.0,
    'Somewhat favorable': 4.0,
    'Very favorable': 5.0
}
# Apply the mapping to the yougov column, using NaN for other entries
df_yougov['pff_dt'] = df_yougov['pff_dt'].map(mapping).fillna(np.nan).astype('float')

In [368]:
# Count the number of 1.0s and 2.0s
counts = df_yougov['pff_dt'].value_counts()

# Print the counts for 1.0 and 2.0
print('1: ', counts.get(1.0, 0))
print('2: ', counts.get(2.0, 0))
print('3: ', counts.get(3.0, 0))
print('4: ', counts.get(4.0, 0))
print('5: ', counts.get(5.0, 0))


1:  1111
2:  176
3:  283
4:  310
5:  535


In [369]:
#anes feeling thermom

# Convert all entries to numeric, setting errors='coerce' will replace non-numeric values with NaN
df_anes['pff_dt'] = pd.to_numeric(df_anes['pff_dt'], errors='coerce')

# Replace entries that are not between 0 and 100 (inclusive) with NaN
df_anes.loc[~((df_anes['pff_dt'] >= 0) & (df_anes['pff_dt'] <= 100)), 'pff_dt'] = np.nan

In [370]:
print(df_anes['pff_dt'].unique())

[100.   0.  15.  85.  75.  40.  95.   5.  60.  nan  70.  98.  80.  30.
  65.  50.  90.  10.  20.  18.  68.   3.  66.  52.  25.  55.  86.  45.
   9.   1.  89.  99.  87.   8.   7.  88.  35.   6.  24.  69.  74.  92.
  49.   2.  72.  77.  34.  12.  94.  91.  82.  29.  56.  51.  64.]


In [371]:
### V201200 = ideo7
### 1 liberal to 7 conservative 
### V201152 = pff_dt
mapping = {
    'Extremely liberal': 1.0,
    'Liberal': 2.0,
    'Slightly liberal': 3.0,
    'Moderate; middle of the road': 4.0,
    'Slightly conservative': 5.0,
    'Conservative': 6.0,
    'Extremely conservative': 7.0
}
# Apply the mapping to the yougov column, using NaN for other entries
df_yougov['ideo7'] = df_yougov['ideo7'].map(mapping).fillna(np.nan).astype('float')

In [372]:
# Count the number of 1.0s and 2.0s
counts = df_yougov['ideo7'].value_counts()

# Print the counts for 1.0 and 2.0
print('1: ', counts.get(1.0, 0))
print('2: ', counts.get(2.0, 0))
print('3: ', counts.get(3.0, 0))
print('4: ', counts.get(4.0, 0))
print('5: ', counts.get(5.0, 0))
print('6: ', counts.get(6.0, 0))
print('7: ', counts.get(7.0, 0))


1:  202
2:  404
3:  159
4:  811
5:  190
6:  443
7:  233


In [373]:
df_anes.loc[~df_anes['ideo7'].isin([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]), 'ideo7'] = np.nan

In [374]:
print(df_anes['ideo7'].unique())

[ 6.  4.  2.  3.  5. nan  1.  7.]


In [375]:
### pid7 = V201231x
#1 dem to 7 republican
df_anes.loc[~df_anes['pid7'].isin([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]), 'pid7'] = np.nan
print(df_anes['pid7'].unique())

[ 7.  4.  3.  6.  2.  1.  5. nan]


In [376]:

mapping = {
    'Strong Democrat': 1.0,
    'Not very strong Democrat': 2.0,
    'Lean Democrat': 3.0,
    'Independent': 4.0,
    'Lean Republican': 5.0,
    'Not very strong Republican': 6.0,
    'Strong Republican': 7.0
}
# Apply the mapping to the yougov column, using NaN for other entries
df_yougov['pid7'] = df_yougov['pid7'].map(mapping).fillna(np.nan).astype('float')

In [377]:
counts = df_yougov['pid7'].value_counts()
# Print the counts 
print('1: ', counts.get(1.0, 0))
print('2: ', counts.get(2.0, 0))
print('3: ', counts.get(3.0, 0))
print('4: ', counts.get(4.0, 0))
print('5: ', counts.get(5.0, 0))
print('6: ', counts.get(6.0, 0))
print('7: ', counts.get(7.0, 0))

1:  567
2:  286
3:  203
4:  456
5:  219
6:  203
7:  418


In [378]:
# V201351 = election_fairnness
#anes 1 not confident to 5 confident

df_anes.loc[~df_anes['election_fairnness'].isin([1.0, 2.0, 3.0, 4.0, 5.0]), 'election_fairnness'] = np.nan
print(df_anes['election_fairnness'].unique())


[ 3.  2.  4.  5.  1. nan]


In [379]:

mapping = {
    'Extremely confident': 5.0,
    'Very confident': 4.0,
    'Moderately confident': 3.0,
    'A little confident': 2.0,
    'Not at all confident': 1.0
}
# Apply the mapping to the yougov column, using NaN for other entries
df_yougov['election_fairnness'] = df_yougov['election_fairnness'].map(mapping).fillna(np.nan).astype('float')

In [380]:
counts = df_yougov['election_fairnness'].value_counts()
# Print the counts 
print('1: ', counts.get(1.0, 0))
print('2: ', counts.get(2.0, 0))
print('3: ', counts.get(3.0, 0))
print('4: ', counts.get(4.0, 0))
print('5: ', counts.get(5.0, 0))

1:  751
2:  205
3:  359
4:  300
5:  827


In [381]:
### V201510 = educ
# ANES : 
# 1 less than hs
# 2. high school
#3. some college no degree
#4. associate degree in college - occupational/vocational
#5. Associate degree in college - academic
# 6. bachelor's degree 
#7. Master's degree
# 8. Professional/doctorate degree

df_anes.loc[~df_anes['educ'].isin([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]), 'educ'] = np.nan
print(df_anes['educ'].unique())

[ 6.  3.  2.  4.  8.  7.  1.  5. nan]


In [382]:
#YG: indirect mapping where 1 is no educ and 6 is high

mapping = {
    'No HS': 1.0,
    'High school graduate': 2.0,
    'Some college': 3.0,
    '2-year': 4.0,
    '4-year': 5.0,
    'Post-grad': 6.0
}
# Apply the mapping to the yougov column, using NaN for other entries
df_yougov['educ'] = df_yougov['educ'].map(mapping).fillna(np.nan).astype('float')

In [383]:
counts = df_yougov['educ'].value_counts()
# Print the counts 
print('1: ', counts.get(1.0, 0))
print('2: ', counts.get(2.0, 0))
print('3: ', counts.get(3.0, 0))
print('4: ', counts.get(4.0, 0))
print('5: ', counts.get(5.0, 0))

1:  111
2:  660
3:  511
4:  240
5:  582


In [384]:
### V201558x = hispanic
# recoding all to 1. yes 2. no

mapping = {
    1.0: 1.0,
    2.0: 1.0,
    3.0: 1.0,
    4.0: 1.0,
    7.0: 2.0
}
df_anes['hispanic'] = df_anes['hispanic'].map(mapping).fillna(np.nan)
df_anes.loc[~df_anes['hispanic'].isin([1.0, 2.0]), 'hispanic'] = np.nan
print(df_anes['hispanic'].unique())


[ 1.  2. nan]


In [1]:
mapping = {
    'Yes': 1.0,
    'No': 2.0
}
#yougov column
df_yougov['hispanic'] = df_yougov['hispanic'].map(mapping).fillna(np.nan).astype(float)

NameError: name 'df_yougov' is not defined

In [386]:
counts = df_yougov['hispanic'].value_counts()
# Print the counts 
print('1: ', counts.get(1.0, 0))
print('2: ', counts.get(2.0, 0))
print('3: ', counts.get(3.0, 0))

1:  320
2:  2122
3:  0


In [387]:
# partisan_violence = V201602
# anes 1 not at all to 5 a great deal
df_anes.loc[~df_anes['partisan_violence'].isin([1.0, 2.0, 3.0, 4.0, 5.0]), 'partisan_violence'] = np.nan
print(df_anes['partisan_violence'].unique())


[ 1.  3.  2.  5. nan  4.]


In [388]:
mapping = {
    'Always': 1.0,
    'Frequently': 2.0,
    'Occasionally': 3.0,
    'Never': 4.0
}
# yougov column
df_yougov['partisan_violence'] = df_yougov['partisan_violence'].map(mapping).fillna(np.nan).astype(float)

In [389]:
counts = df_yougov['partisan_violence'].value_counts()
# Print the counts 
print('1: ', counts.get(1.0, 0))
print('2: ', counts.get(2.0, 0))
print('3: ', counts.get(3.0, 0))
print('4: ', counts.get(4.0, 0))

1:  55
2:  70
3:  201
4:  2071


In [390]:
### V202242x = immigrant_citizenship
# 1 favor to 7 oppose

df_anes.loc[~df_anes['immigrant_citizenship'].isin([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]), 'immigrant_citizenship'] = np.nan
print(df_anes['immigrant_citizenship'].unique())

[ 1.  3.  2.  7.  4.  6.  5. nan]


In [391]:
mapping = {
    'Favor a great deal': 1.0,
    'Favor a moderate amount': 2.0,
    'Favor a little': 3.0,
    'Neither favor nor oppose': 4.0,
    'Oppose a little': 5.0,
    'Oppose a moderate amount': 6.0,
    'Oppose a great deal': 7.0
}
#yougov column
df_yougov['immigrant_citizenship'] = df_yougov['immigrant_citizenship'].map(mapping).fillna(np.nan).astype(float)

In [392]:
counts = df_yougov['immigrant_citizenship'].value_counts()
# Print the counts 
print('1: ', counts.get(1.0, 0))
print('2: ', counts.get(2.0, 0))
print('3: ', counts.get(3.0, 0))
print('4: ', counts.get(4.0, 0))
print('5: ', counts.get(5.0, 0))
print('6: ', counts.get(6.0, 0))
print('7: ', counts.get(7.0, 0))

1:  657
2:  337
3:  295
4:  446
5:  131
6:  159
7:  417


In [393]:
### immigrant_deport = V202245x
# 1 favor to 7 oppose

df_anes.loc[~df_anes['immigrant_deport'].isin([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]), 'immigrant_deport'] = np.nan
print(df_anes['immigrant_deport'].unique())

[ 1.  4.  7.  2.  6.  5. nan  3.]


In [394]:
mapping = {
    'Favor a great deal': 1.0,
    'Favor a moderate amount': 2.0,
    'Favor a little': 3.0,
    'Neither favor nor oppose': 4.0,
    'Oppose a little': 5.0,
    'Oppose a moderate amount': 6.0,
    'Oppose a great deal': 7.0
}
#yougov column
df_yougov['immigrant_deport'] = df_yougov['immigrant_deport'].map(mapping).fillna(np.nan).astype(float)

In [395]:
# NOTE: suspicious numbers differ from logged yougov data
counts = df_yougov['immigrant_deport'].value_counts()
# Print the counts 
print('1: ', counts.get(1.0, 0))
print('2: ', counts.get(2.0, 0))
print('3: ', counts.get(3.0, 0))
print('4: ', counts.get(4.0, 0))
print('5: ', counts.get(5.0, 0))
print('6: ', counts.get(6.0, 0))
print('7: ', counts.get(7.0, 0))

1:  408
2:  207
3:  199
4:  536
5:  215
6:  233
7:  644


In [396]:
# V202266 =auth_grid_1
# independence 1 respect for elders 2

df_anes.loc[~df_anes['auth_grid_1'].isin([1.0, 2.0]), 'auth_grid_1'] = np.nan
print(df_anes['auth_grid_1'].unique())

[ 2.  1. nan]


In [397]:
mapping = {
    'Independence': 1.0,
    'Respect for Elders': 2.0
}
#yougov column
df_yougov['auth_grid_1'] = df_yougov['auth_grid_1'].map(mapping).fillna(np.nan).astype(float)

In [398]:
counts = df_yougov['auth_grid_1'].value_counts()
# Print the counts 
print('1: ', counts.get(1.0, 0))
print('2: ', counts.get(2.0, 0))

1:  883
2:  1559


In [399]:
### V202267= auth_grid_3
#1. curiosity 2. good manners

df_anes.loc[~df_anes['auth_grid_3'].isin([1.0, 2.0]), 'auth_grid_3'] = np.nan
print(df_anes['auth_grid_3'].unique())


[ 2.  1. nan]


In [400]:
mapping = {
    'Curiosity': 1.0,
    'Good Manners': 2.0
}
#yougov column
df_yougov['auth_grid_3'] = df_yougov['auth_grid_3'].map(mapping).fillna(np.nan).astype(float)

In [401]:
counts = df_yougov['auth_grid_3'].value_counts()
# Print the counts 
print('1: ', counts.get(1.0, 0))
print('2: ', counts.get(2.0, 0))

1:  998
2:  1444


In [402]:
### V202268 = auth_grid_2
#1. obedience 2. self-reliance

df_anes.loc[~df_anes['auth_grid_2'].isin([1.0, 2.0]), 'auth_grid_2'] = np.nan
print(df_anes['auth_grid_2'].unique())

[ 2.  1. nan]


In [403]:
mapping = {
    'Obedience': 1.0,
    'Self-reliance': 2.0
}
#yougov column
df_yougov['auth_grid_2'] = df_yougov['auth_grid_2'].map(mapping).fillna(np.nan).astype(float)

In [404]:
#DEBUG: answers appear to be reversed 
counts = df_yougov['auth_grid_2'].value_counts()
# Print the counts 
print('1: ', counts.get(1.0, 0))
print('2: ', counts.get(2.0, 0))

1:  1575
2:  867


In [405]:
### faminc_new = V202468x
# ANES: 1 low through 22 high (250000)

# Convert all entries to numeric, setting errors='coerce' will replace non-numeric values with NaN
df_anes['faminc_new'] = pd.to_numeric(df_anes['faminc_new'], errors='coerce')

# Replace entries that are not between 0 and 100 (inclusive) with NaN
df_anes.loc[~((df_anes['faminc_new'] >= 0) & (df_anes['faminc_new'] <= 22)), 'faminc_new'] = np.nan

print(df_anes['faminc_new'].unique())


[21. 13. 17.  7. 22.  3.  4. 10. 11.  9. 18.  1. 20.  5. 12.  6. 15.  8.
 nan 19. 14. 16.  2.]


In [406]:
mapping = {
    'Less than $10,000': 1.0,
    '$10,000 - $19,999': 2.0,
    '$20,000 - $29,999': 3.0,
    '$30,000 - $39,999': 4.0,
    '$40,000 - $49,999': 5.0,
    '$50,000 - $59,999': 6.0,
    '$60,000 - $69,999': 7.0,
    '$70,000 - $79,999': 8.0,
    '$80,000 - $99,999': 9.0,
    '$100,000 - $119,999': 10.0,
    '$120,000 - $149,999': 11.0,
    '$150,000 - $199,999': 12.0,
    '$200,000 - $249,999': 13.0,
    '$250,000 - $349,999': 14.0,
    '$350,000 - $499,999': 15.0,
    '$500,000 or more': 16.0
}
# yougov column
df_yougov['faminc_new'] = df_yougov['faminc_new'].map(mapping).fillna(np.nan).astype(float)

In [407]:
# DEBUG: this doesn't look right either
counts = df_yougov['faminc_new'].value_counts()
# Print the counts 
print('1: ', counts.get(1.0, 0))
print('2: ', counts.get(2.0, 0))
print('12: ', counts.get(12.0, 0))

1:  181
2:  118
12:  203


In [None]:
### RACE RESENT

In [408]:
#DEBUG: w1, w2, w3 specifications
### wc_together = V202483
# 1 important to 5 not
#df_anes.loc[~df_anes['wc_together'].isin([1.0, 2.0, 3.0, 4.0, 5.0]), 'wc_together'] = np.nan
#print(df_anes['wc_together'].unique())

In [409]:
'''mapping = {
    'Extremely important': 1.0,
    'Very important': 2.0,
    'Moderately important': 3.0,
    'A little important': 4.0,
    'Not at all important': 5.0
}
df_yougov['wc_together'] = df_yougov['wc_together'].map(mapping).fillna(np.nan)
'''

"mapping = {\n    'Extremely important': 1.0,\n    'Very important': 2.0,\n    'Moderately important': 3.0,\n    'A little important': 4.0,\n    'Not at all important': 5.0\n}\ndf_yougov['wc_together'] = df_yougov['wc_together'].map(mapping).fillna(np.nan)\n"

In [410]:

'''counts = df_yougov['wc_together'].value_counts()
# Print the counts 
print('1: ', counts.get(1.0, 0))
print('2: ', counts.get(2.0, 0))
print('3: ', counts.get(3.0, 0))
print('4: ', counts.get(4.0, 0))
print('5: ', counts.get(5.0, 0))'''


"counts = df_yougov['wc_together'].value_counts()\n# Print the counts \nprint('1: ', counts.get(1.0, 0))\nprint('2: ', counts.get(2.0, 0))\nprint('3: ', counts.get(3.0, 0))\nprint('4: ', counts.get(4.0, 0))\nprint('5: ', counts.get(5.0, 0))"

In [411]:
### same with all vars past this point

In [412]:
# Check the data type of the column 
print(df_anes['turnout20post'])

0      -1.0
1      -1.0
2      -1.0
3      -1.0
4      -1.0
       ... 
8275   -1.0
8276   -1.0
8277   -1.0
8278    1.0
8279   -1.0
Name: turnout20post, Length: 8280, dtype: float64


In [None]:
# Save the DataFrames as Stata .dta files
df_anes.to_stata('temp_anes.dta')

In [414]:
df_yougov = df_yougov.drop(columns=['race'])
print(df_yougov.select_dtypes(include=['category']).columns)

Index([], dtype='object')


In [416]:
df_yougov.to_stata('temp_yougov.dta')