#### Instalando as bibliotecas necessárias

In [78]:
%pip install -q -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


#### Importando as bibliotecas

In [79]:
# Importando tudo que a análise econometrica precisa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import pyreadstat
import seaborn as sns
import pandasql as ps

#### Importando os dados em formato sav

In [80]:
# Lê os arquivos .sav
df_general, meta1 = pyreadstat.read_sav('data/Deidentified_WPH003a Economy (General Population) (SPSS Version).sav')
df_economists, meta2 = pyreadstat.read_sav('data/Deidentified_WPH003b Economy (Economists) (SPSS Version).sav')

# Função para criar um DataFrame com labels
def create_label_df(df, meta):
    df_labels = df.copy()  # Copia o DataFrame original
    for column in df_labels.columns:
        if column in meta.variable_value_labels:  # Verifica se há rótulos para a coluna
            labels_dict = meta.variable_value_labels[column]  # Dicionário de rótulos {valor_numérico: label}
            df_labels[column] = df_labels[column].map(labels_dict)  # Mapeia os códigos para rótulos
    return df_labels

# Cria DataFrames com labels para ambos os arquivos
df_general_labels = create_label_df(df_general, meta1)
df_economists_labels = create_label_df(df_economists, meta2)

In [81]:
# Criando uma cópia de df_general com o prefixo "cod_"
df_general_renamed = df_general.copy()
df_general_renamed.columns = ['cod_' + col for col in df_general_renamed.columns]

# Unindo a descrição de resposta (df_general_labels) com a resposta codificada (df_general_renamed) usando pandasql
query = '''
SELECT 
    df_general_labels.*, df_general_renamed.*
FROM
    df_general_labels
JOIN
    df_general_renamed
ON
    df_general_labels.intv = df_general_renamed.cod_intv
'''

df_general_union = ps.sqldf(query, locals())

df_general_union.head()


Unnamed: 0,intv,q901,q905,q908,q910,q918,race,sex,state,region,...,cod_q46,cod_q47,cod_q48,cod_q49,cod_q50,cod_q51,cod_q55,cod_q56,cod_q919,cod_wt1
0,1.0,DEMOCRAT,YES,LIBERAL,,$25000-29999,WHITE,FEMALE,37.0,SOUTH,...,6.0,,2.0,,,2.0,5.0,6.0,2.0,0.89
1,2.0,REPUBLICAN,NO,VERY CONSERVATIVE,,$50000-74999,WHITE,FEMALE,37.0,SOUTH,...,1.0,1.0,1.0,1.0,1.0,1.0,6.0,7.0,2.0,1.0
2,3.0,DEMOCRAT,NO,MODERATE,,$10000-19999,WHITE,FEMALE,27.0,SOUTH,...,,,2.0,,,2.0,3.0,,2.0,1.23
3,4.0,INDEPENDENT,YES,MODERATE,,$50000-74999,WHITE,FEMALE,37.0,SOUTH,...,1.0,1.0,2.0,,,2.0,5.0,5.0,2.0,0.88
4,5.0,INDEPENDENT,YES,MODERATE,,$50000-74999,WHITE,MALE,35.0,SOUTH,...,1.0,1.0,2.0,,,1.0,5.0,5.0,2.0,0.94


In [82]:
# Criando uma cópia de df_economists com o prefixo "cod_"
df_economists_renamed = df_economists.copy()
df_economists_renamed.columns = ['cod_' + col for col in df_economists_renamed.columns]

# Unindo a descrição de resposta (df_economists_labels) com a resposta codificada (df_economists_renamed) usando pandasql
query = '''
SELECT 
    df_economists_labels.*, df_economists_renamed.*
FROM
    df_economists_labels
JOIN
    df_economists_renamed
ON
    df_economists_labels.intv = df_economists_renamed.cod_intv
'''

df_economists_union = ps.sqldf(query, locals())

df_economists_union


Unnamed: 0,intv,q901,q905,q908,q910,q918,race,sex,state,region,...,cod_q44,cod_q45,cod_q919,cod_q115,cod_seq,cod_pri,cod_sic,cod_title,cod_effort,cod_expdate
0,1.0,,,,,,,,24.0,SOUTH,...,,,,,1197.0,3.0,0.0,99.0,,9706.0
1,2.0,INDEPENDENT,YES,MODERATE,,$100000 OR MORE,WHITE,MALE,24.0,SOUTH,...,1.0,3.0,2.0,2.0,1028.0,0.0,0.0,0.0,N50301,9704.0
2,3.0,INDEPENDENT,YES,MODERATE,,$75000-99999,WHITE,MALE,24.0,SOUTH,...,1.0,3.0,2.0,2.0,845.0,0.0,0.0,99.0,,9706.0
3,4.0,DEMOCRAT,YES,MODERATE,,$50000-74999,WHITE,MALE,24.0,SOUTH,...,2.0,,2.0,2.0,836.0,0.0,0.0,99.0,,9612.0
4,5.0,,,,,,,,24.0,SOUTH,...,,,,,933.0,0.0,0.0,99.0,,9703.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
594,595.0,REPUBLICAN,YES,MODERATE,,$50000-74999,WHITE,MALE,10.0,MIDWEST,...,1.0,1.0,2.0,2.0,1966.0,0.0,5.0,99.0,,9709.0
595,596.0,,,,,,,,8.0,EAST,...,,,,,332.0,0.0,0.0,99.0,,9612.0
596,597.0,,,,,,,,23.0,SOUTH,...,,,,,1367.0,0.0,0.0,99.0,,9706.0
597,598.0,REPUBLICAN,YES,CONSERVATIVE,,$100000 OR MORE,WHITE,MALE,49.0,WEST,...,2.0,,2.0,2.0,2830.0,0.0,0.0,99.0,,9706.0


In [83]:

# criando as variáveis de controle

query = '''
SELECT 
    df_general_union.*,
    0 AS econ,
    
    CASE 
        WHEN cod_sex = 1 THEN 1 
        ELSE 0 
    END AS male,
    
    1996 - q910 AS age_in_1996,
    
    CASE 
        WHEN cod_race NOT IN (1, 2, 3) THEN 1 
        ELSE 0 
    END AS othrace,
    
    CASE 
        WHEN cod_race = 1 THEN 1 
        ELSE 0 
    END AS white,
    
    CASE 
        WHEN cod_race = 3 THEN 1 
        ELSE 0 
    END AS asian,
    
    CASE 
        WHEN cod_race = 2 THEN 1 
        ELSE 0 
    END AS black,
    
    CASE 
        WHEN cod_q36 = 4 THEN 3
        WHEN cod_q36 = 3 THEN 2
        WHEN cod_q36 = 2 THEN 1
        WHEN cod_q36 = 1 THEN 0
        ELSE NULL 
    END AS jobsecurity,
    
    CASE 
        WHEN cod_q15 = 3 THEN 0
        WHEN cod_q15 = 2 THEN 1
        WHEN cod_q15 = 1 THEN 2
        ELSE NULL 
    END AS yourlast5,
    
    CASE 
        WHEN cod_q17 = 2 THEN 0
        WHEN cod_q17 = 3 THEN 1
        WHEN cod_q17 = 1 THEN 2
        ELSE NULL 
    END AS yournext5,
    
    CASE 
        WHEN cod_q918 BETWEEN 1 AND 9 THEN cod_q918 
        ELSE NULL 
    END AS income,
    
    CASE 
        WHEN cod_q901 = 1 THEN 1 
        ELSE 0 
    END AS dem,
    
    CASE 
        WHEN cod_q901 = 2 THEN 1 
        ELSE 0 
    END AS rep,
    
    CASE 
        WHEN cod_q901 = 3 THEN 1 
        ELSE 0 
    END AS indep,
    
    CASE 
        WHEN cod_q901 = 4 THEN 1 
        ELSE 0 
    END AS othparty,
    
    CASE 
        WHEN cod_q908 = 1 THEN -2
        WHEN cod_q908 = 2 THEN -1
        WHEN cod_q908 = 3 THEN 0
        WHEN cod_q908 = 4 THEN 1
        WHEN cod_q908 = 5 THEN 2
        ELSE NULL 
    END AS ideology,
    
    CASE 
        WHEN cod_q908 = 6 THEN 1 
        ELSE 0 
    END AS othideol,
    
    CASE 
        WHEN q55 = 'None' THEN 1
        WHEN cod_q55 BETWEEN 1 AND 7 THEN cod_q55
        ELSE NULL 
    END AS education,
    
    df_general_union.q27a1 AS TAXESHIGH,
    df_general_union.q27b1 AS DEFICIT,
    df_general_union.q27c1 AS FORAID,
    df_general_union.q27d1 AS IMMIG,
    df_general_union.q27e1 AS TAXBREAK,
    df_general_union.q27f1 AS INADEDUC,
    df_general_union.q27g1 AS WELFARE,
    df_general_union.q27h1 AS AA,
    df_general_union.q27i1 AS HARDWORK,
    df_general_union.q27j1 AS REG,
    df_general_union.q27k1 AS SAVINGS,
    df_general_union.q29l1 AS PROFHIGH,
    df_general_union.q29m1 AS EXECPAY,
    df_general_union.q29n1 AS BUSPROD,
    df_general_union.q29o1 AS TECH,
    df_general_union.q29p1 AS OVERSEAS,
    df_general_union.q29q1 AS DOWNSIZE,
    df_general_union.q29r1 AS COMPEDUC,
    df_general_union.q22a1 AS TAXCUT,
    df_general_union.q22b1 AS WOMENWORK,
    df_general_union.q22c1 AS TECHGOOD,
    df_general_union.q22d1 AS TRADEAG,
    df_general_union.q22e1 AS DOWNGOOD,
    df_general_union.q13 AS CHANGE20,
    df_general_union.q24 AS TRADEJOB,
    df_general_union.q26 AS WHYGASSD,
    df_general_union.q25 AS GASPRICE,
    df_general_union.q21 AS PRES,
    df_general_union.q9 AS NEWJOB,
    df_general_union.q10 AS GAP20,
    df_general_union.q11 AS INCOME20,
    df_general_union.q12 AS WAGE20,
    df_general_union.q13 AS NEED2EARN,
    df_general_union.q14 AS STAN5,
    df_general_union.q18 AS CHILDGEN,
    df_general_union.q49 AS CHILDSTAN,
    df_general_union.q1 AS CURECON

FROM 
    df_general_union
'''

df_general_union = ps.sqldf(query, locals())

df_general_union

Unnamed: 0,intv,q901,q905,q908,q910,q918,race,sex,state,region,...,PRES,NEWJOB,GAP20,INCOME20,WAGE20,NEED2EARN,STAN5,CHILDGEN,CHILDSTAN,CURECON
0,1.0,DEMOCRAT,YES,LIBERAL,,$25000-29999,WHITE,FEMALE,37.0,SOUTH,...,DELINE IN MORAL VALUES,LOW-PAYING,LARGER,FALLING BEHIND,FALLING BEHIND,TWO WAGE EARNERS,FALL,LOWER,,GROWING SLOWLY
1,2.0,REPUBLICAN,NO,VERY CONSERVATIVE,,$50000-74999,WHITE,FEMALE,37.0,SOUTH,...,NOT ENOUGH MONEY,PAY WELL,ABOUT THE SAME,STAYING EVEN,STAYING EVEN,ONE WAGE EARNER,STAYED SAME,HIGHER,HIGHER,GROWING SLOWLY
2,3.0,DEMOCRAT,NO,MODERATE,,$10000-19999,WHITE,FEMALE,27.0,SOUTH,...,NOT ENOUGH MONEY,LOW-PAYING,ABOUT THE SAME,STAYING EVEN,STAYING EVEN,TWO WAGE EARNERS,RISE,HIGHER,,GROWING SLOWLY
3,4.0,INDEPENDENT,YES,MODERATE,,$50000-74999,WHITE,FEMALE,37.0,SOUTH,...,HEALTH,LOW-PAYING,LARGER,STAYING EVEN,STAYING EVEN,ONE WAGE EARNER,STAYED SAME,HIGHER,,IN DEPRESSION
4,5.0,INDEPENDENT,YES,MODERATE,,$50000-74999,WHITE,MALE,35.0,SOUTH,...,HEALTH,LOW-PAYING,LARGER,FALLING BEHIND,FALLING BEHIND,TWO WAGE EARNERS,STAYED SAME,HIGHER,,STAGNATING
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1505,1506.0,INDEPENDENT,NO,LIBERAL,,$30000-39999,WHITE,MALE,30.0,SOUTH,...,HEALTH,LOW-PAYING,LARGER,FALLING BEHIND,FALLING BEHIND,TWO WAGE EARNERS,RISE,LOWER,,IN RECESSION
1506,1507.0,DEMOCRAT,YES,MODERATE,,$50000-74999,WHITE,FEMALE,25.0,SOUTH,...,HEALTH,LOW-PAYING,LARGER,FALLING BEHIND,FALLING BEHIND,TWO WAGE EARNERS,FALL,LOWER,STAY SAME,IN RECESSION
1507,1508.0,INDEPENDENT,NO,MODERATE,,$10000-19999,,MALE,33.0,SOUTH,...,HEALTH,LOW-PAYING,LARGER,FALLING BEHIND,FALLING BEHIND,TWO WAGE EARNERS,FALL,DK/NO OPINION,,STAGNATING
1508,1509.0,DEMOCRAT,YES,LIBERAL,,$40000-49999,BLACK,FEMALE,26.0,SOUTH,...,HEALTH,LOW-PAYING,ABOUT THE SAME,GOING UP,FALLING BEHIND,TWO WAGE EARNERS,STAYED SAME,HIGHER,HIGHER,GROWING RAPIDLY


# a partir daqui não está funcionando


In [84]:
# criando as variáveis de controle

query = '''
SELECT
      df_economists_union.*
    , 1 as econ
    , CASE
          WHEN cod_sex = 1
          THEN 1
          ELSE 0
      END AS male
    , 1996 - q910 AS age_in_1996
    , CASE
          WHEN cod_race NOT IN (1, 2, 3)
          THEN 1
          ELSE 0
      END AS othrace
    , CASE
          WHEN cod_race = 1
          THEN 1
          ELSE 0
      END AS white
    , CASE
          WHEN cod_race = 3
          THEN 1
          ELSE 0
      END AS asian
    , CASE
          WHEN cod_race = 2
          THEN 1
          ELSE 0
      END AS black
    , CASE
          WHEN cod_q39 = 4 THEN 3
          WHEN cod_q39 = 3 THEN 2
          WHEN cod_q39 = 2 THEN 1
          WHEN cod_q39 = 1 THEN 0
          ELSE NULL
      END AS jobsecurity
    , CASE
          WHEN cod_q17 = 3 THEN 0
          WHEN cod_q17 = 2 THEN 1
          WHEN cod_q17 = 1 THEN 2
          ELSE NULL
      END AS yourlast5
    , CASE
          WHEN cod_q19 = 2 THEN 0
          WHEN cod_q19 = 3 THEN 1
          WHEN cod_q19 = 1 THEN 2
          ELSE NULL
      END AS yournext5
    , CASE
          WHEN cod_q918 = 1 THEN 1
          WHEN cod_q918 = 2 THEN 2
          WHEN cod_q918 = 3 THEN 3
          WHEN cod_q918 = 4 THEN 4
          WHEN cod_q918 = 5 THEN 5
          WHEN cod_q918 = 6 THEN 6
          WHEN cod_q918 = 7 THEN 7
          WHEN cod_q918 = 8 THEN 8
          WHEN cod_q918 = 9 THEN 9
          ELSE NULL
      END AS income
    , CASE
          WHEN cod_q901 = 1 THEN 1
          ELSE 0
      END AS dem
    , CASE
          WHEN cod_q901 = 2 THEN 1
          ELSE 0
      END AS rep
    , CASE
          WHEN cod_q901 = 3 THEN 1
          ELSE 0
      END AS indep
    , CASE
          WHEN cod_q901 = 4 THEN 1
          ELSE 0
      END AS othparty
    , CASE
          WHEN cod_q908 = 1 THEN -2
          WHEN cod_q908 = 2 THEN -1
          WHEN cod_q908 = 3 THEN 0
          WHEN cod_q908 = 4 THEN 1
          WHEN cod_q908 = 5 THEN 2
          ELSE NULL
      END AS ideology
    , CASE
          WHEN cod_q908 = 6 THEN 1
          ELSE 0
      END AS othideol
    , 7 as education
    , df_economists_union.q27a1  AS TAXESHIGH
    , df_economists_union.q27b1  AS DEFICIT
    , df_economists_union.q27c1  AS FORAID
    , df_economists_union.q27d1  AS IMMIG
    , df_economists_union.q27e1  AS TAXBREAK
    , df_economists_union.q27f1  AS INADEDUC
    , df_economists_union.q27g1  AS WELFARE
    , df_economists_union.q27h1  AS AA
    , df_economists_union.q27i1  AS HARDWORK
    , df_economists_union.q27j1  AS REG
    , df_economists_union.q27k1  AS SAVINGS
    , df_economists_union.q29l1  AS PROFHIGH
    , df_economists_union.q29m1  AS EXECPAY
    , df_economists_union.q29n1  AS BUSPROD
    , df_economists_union.q29o1  AS TECH
    , df_economists_union.q29p1  AS OVERSEAS
    , df_economists_union.q29q1  AS DOWNSIZE
    , df_economists_union.q29r1  AS COMPEDUC
    , df_economists_union.q22a1  AS TAXCUT
    , df_economists_union.q22b1  AS WOMENWORK
    , df_economists_union.q22c1  AS TECHGOOD
    , df_economists_union.q22d1  AS TRADEAG
    , df_economists_union.q22e1  AS DOWNGOOD
    , df_economists_union.q13    AS CHANGE20
    , df_economists_union.q24    AS TRADEJOB
    , df_economists_union.q26    AS WHYGASSD
    , df_economists_union.q25    AS GASPRICE
    , df_economists_union.q21    AS PRES
    , df_economists_union.q9     AS NEWJOB
    , df_economists_union.q10    AS GAP20
    , df_economists_union.q11    AS INCOME20
    , df_economists_union.q12    AS WAGE20
    , df_economists_union.q13    AS NEED2EARN
    , df_economists_union.q14    AS STAN5
    , df_economists_union.q18    AS CHILDGEN
    , df_economists_union.q49    AS CHILDSTAN
    , df_economists_union.q1     AS CURECON
FROM
    df_economists_union
'''

df_economists_union = ps.sqldf(query, locals())

df_economists_union

PandaSQLException: (sqlite3.OperationalError) no such column: df_economists_union.q27a1
[SQL: 
SELECT
      df_economists_union.*
    , 1 as econ
    , CASE
          WHEN cod_sex = 1
          THEN 1
          ELSE 0
      END AS male
    , 1996 - q910 AS age_in_1996
    , CASE
          WHEN cod_race NOT IN (1, 2, 3)
          THEN 1
          ELSE 0
      END AS othrace
    , CASE
          WHEN cod_race = 1
          THEN 1
          ELSE 0
      END AS white
    , CASE
          WHEN cod_race = 3
          THEN 1
          ELSE 0
      END AS asian
    , CASE
          WHEN cod_race = 2
          THEN 1
          ELSE 0
      END AS black
    , CASE
          WHEN cod_q39 = 4 THEN 3
          WHEN cod_q39 = 3 THEN 2
          WHEN cod_q39 = 2 THEN 1
          WHEN cod_q39 = 1 THEN 0
          ELSE NULL
      END AS jobsecurity
    , CASE
          WHEN cod_q17 = 3 THEN 0
          WHEN cod_q17 = 2 THEN 1
          WHEN cod_q17 = 1 THEN 2
          ELSE NULL
      END AS yourlast5
    , CASE
          WHEN cod_q19 = 2 THEN 0
          WHEN cod_q19 = 3 THEN 1
          WHEN cod_q19 = 1 THEN 2
          ELSE NULL
      END AS yournext5
    , CASE
          WHEN cod_q918 = 1 THEN 1
          WHEN cod_q918 = 2 THEN 2
          WHEN cod_q918 = 3 THEN 3
          WHEN cod_q918 = 4 THEN 4
          WHEN cod_q918 = 5 THEN 5
          WHEN cod_q918 = 6 THEN 6
          WHEN cod_q918 = 7 THEN 7
          WHEN cod_q918 = 8 THEN 8
          WHEN cod_q918 = 9 THEN 9
          ELSE NULL
      END AS income
    , CASE
          WHEN cod_q901 = 1 THEN 1
          ELSE 0
      END AS dem
    , CASE
          WHEN cod_q901 = 2 THEN 1
          ELSE 0
      END AS rep
    , CASE
          WHEN cod_q901 = 3 THEN 1
          ELSE 0
      END AS indep
    , CASE
          WHEN cod_q901 = 4 THEN 1
          ELSE 0
      END AS othparty
    , CASE
          WHEN cod_q908 = 1 THEN -2
          WHEN cod_q908 = 2 THEN -1
          WHEN cod_q908 = 3 THEN 0
          WHEN cod_q908 = 4 THEN 1
          WHEN cod_q908 = 5 THEN 2
          ELSE NULL
      END AS ideology
    , CASE
          WHEN cod_q908 = 6 THEN 1
          ELSE 0
      END AS othideol
    , 7 as education
    , df_economists_union.q27a1  AS TAXESHIGH
    , df_economists_union.q27b1  AS DEFICIT
    , df_economists_union.q27c1  AS FORAID
    , df_economists_union.q27d1  AS IMMIG
    , df_economists_union.q27e1  AS TAXBREAK
    , df_economists_union.q27f1  AS INADEDUC
    , df_economists_union.q27g1  AS WELFARE
    , df_economists_union.q27h1  AS AA
    , df_economists_union.q27i1  AS HARDWORK
    , df_economists_union.q27j1  AS REG
    , df_economists_union.q27k1  AS SAVINGS
    , df_economists_union.q29l1  AS PROFHIGH
    , df_economists_union.q29m1  AS EXECPAY
    , df_economists_union.q29n1  AS BUSPROD
    , df_economists_union.q29o1  AS TECH
    , df_economists_union.q29p1  AS OVERSEAS
    , df_economists_union.q29q1  AS DOWNSIZE
    , df_economists_union.q29r1  AS COMPEDUC
    , df_economists_union.q22a1  AS TAXCUT
    , df_economists_union.q22b1  AS WOMENWORK
    , df_economists_union.q22c1  AS TECHGOOD
    , df_economists_union.q22d1  AS TRADEAG
    , df_economists_union.q22e1  AS DOWNGOOD
    , df_economists_union.q13    AS CHANGE20
    , df_economists_union.q24    AS TRADEJOB
    , df_economists_union.q26    AS WHYGASSD
    , df_economists_union.q25    AS GASPRICE
    , df_economists_union.q21    AS PRES
    , df_economists_union.q9     AS NEWJOB
    , df_economists_union.q10    AS GAP20
    , df_economists_union.q11    AS INCOME20
    , df_economists_union.q12    AS WAGE20
    , df_economists_union.q13    AS NEED2EARN
    , df_economists_union.q14    AS STAN5
    , df_economists_union.q18    AS CHILDGEN
    , df_economists_union.q49    AS CHILDSTAN
    , df_economists_union.q1     AS CURECON
FROM
    df_economists_union
]
(Background on this error at: https://sqlalche.me/e/20/e3q8)

In [34]:
common_columns = df_general_union.columns.intersection(df_economists_union.columns)

common_columns

Index(['intv', 'q901', 'q905', 'q908', 'q910', 'q918', 'race', 'sex', 'state',
       'region',
       ...
       'yourlast5', 'yournext5', 'income', 'dem', 'rep', 'indep', 'othparty',
       'ideology', 'othideol', 'education'],
      dtype='object', length=110)

In [35]:
common_columns = df_general_union.columns.intersection(df_economists_union.columns)

df_general_common = df_general_union[common_columns]
df_economists_common = df_economists_union[common_columns]

# Concatenar os DataFrames
df_union = pd.concat([df_general_common, df_economists_common], ignore_index=True)

df_union

  df_union = pd.concat([df_general_common, df_economists_common], ignore_index=True)


Unnamed: 0,intv,q901,q905,q908,q910,q918,race,sex,state,region,...,yourlast5,yournext5,income,dem,rep,indep,othparty,ideology,othideol,education
0,1.0,DEMOCRAT,YES,LIBERAL,,$25000-29999,WHITE,FEMALE,37.0,SOUTH,...,0.0,0.0,4.0,1,0,0,0,-1.0,0,5.0
1,2.0,REPUBLICAN,NO,VERY CONSERVATIVE,,$50000-74999,WHITE,FEMALE,37.0,SOUTH,...,2.0,2.0,7.0,0,1,0,0,2.0,0,6.0
2,3.0,DEMOCRAT,NO,MODERATE,,$10000-19999,WHITE,FEMALE,27.0,SOUTH,...,2.0,1.0,2.0,1,0,0,0,0.0,0,3.0
3,4.0,INDEPENDENT,YES,MODERATE,,$50000-74999,WHITE,FEMALE,37.0,SOUTH,...,1.0,1.0,7.0,0,0,1,0,0.0,0,5.0
4,5.0,INDEPENDENT,YES,MODERATE,,$50000-74999,WHITE,MALE,35.0,SOUTH,...,2.0,1.0,7.0,0,0,1,0,0.0,0,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2104,595.0,REPUBLICAN,YES,MODERATE,,$50000-74999,WHITE,MALE,10.0,MIDWEST,...,2.0,2.0,7.0,0,1,0,0,0.0,0,7.0
2105,596.0,,,,,,,,8.0,EAST,...,,,,0,0,0,0,,0,7.0
2106,597.0,,,,,,,,23.0,SOUTH,...,,,,0,0,0,0,,0,7.0
2107,598.0,REPUBLICAN,YES,CONSERVATIVE,,$100000 OR MORE,WHITE,MALE,49.0,WEST,...,1.0,1.0,9.0,0,1,0,0,1.0,0,7.0


In [36]:
df_union[['econ']]

Unnamed: 0,econ
0,0
1,0
2,0
3,0
4,0
...,...
2104,1
2105,1
2106,1
2107,1


### preparando o df para as regressões

In [51]:
# Renomeando as colunas com inplace=True para forçar a modificação do DataFrame original
df_union.rename(columns={
    'q27a1'  : 'TAXESHIGH',
    'q27b1'  : 'DEFICIT',
    'q27c1'  : 'FORAID',
    'q27d1'  : 'IMMIG',
    'q27e1'  : 'TAXBREAK',
    'q27f1'  : 'INADEDUC',
    'q27g1'  : 'WELFARE',
    'q27h1'  : 'AA',
    'q27i1'  : 'HARDWORK',
    'q27j1'  : 'REG',
    'q27k1'  : 'SAVINGS',
    'q29l1'  : 'PROFHIGH',
    'q29m1'  : 'EXECPAY',
    'q29n1'  : 'BUSPROD',
    'q29o1'  : 'TECH',
    'q29p1'  : 'OVERSEAS',
    'q29q1'  : 'DOWNSIZE',
    'q29r1'  : 'COMPEDUC',
    'q22a1'  : 'TAXCUT',
    'q22b1'  : 'WOMENWORK',
    'q22c1'  : 'TECHGOOD',
    'q22d1'  : 'TRADEAG',
    'q22e1'  : 'DOWNGOOD',
    'q13'    : 'CHANGE20',
    'q24'    : 'TRADEJOB',
    'q26'    : 'WHYGASSD',
    'q25'    : 'GASPRICE',
    'q21'    : 'PRES',
    'q9'     : 'NEWJOB',
    'q10'    : 'GAP20',
    'q11'    : 'INCOME20',
    'q12'    : 'WAGE20',
    'q13'    : 'NEED2EARN',
    'q14'    : 'STAN5',
    'q18'    : 'CHILDGEN',
    'q49'    : 'CHILDSTAN',
    'q1'     : 'CURECON'
}, inplace=True)

# Verificando se as colunas foram renomeadas corretamente
df_union.columns.to_frame().to_clipboard(excel=True)


In [48]:
# Selecionando apenas as colunas que serão utilizadas na análise e renomeado

df_union.rename(columns={'q27a1'  : 'TAXESHIGH'
                        , 'q27b1' : 'DEFICIT'
                        , 'q27c1' : 'FORAID'
                        , 'q27d1' : 'IMMIG'
                        , 'q27e1' : 'TAXBREAK'
                        , 'q27f1' : 'INADEDUC'
                        , 'q27g1' : 'WELFARE'
                        , 'q27h1' : 'AA'
                        , 'q27i1' : 'HARDWORK'
                        , 'q27j1' : 'REG'
                        , 'q27k1' : 'SAVINGS'
                        , 'q29l1' : 'PROFHIGH'
                        , 'q29m1' : 'EXECPAY'
                        , 'q29n1' : 'BUSPROD'
                        , 'q29o1' : 'TECH'
                        , 'q29p1' : 'OVERSEAS'
                        , 'q29q1' : 'DOWNSIZE'
                        , 'q29r1' : 'COMPEDUC'
                        , 'q22a1' : 'TAXCUT'
                        , 'q22b1' : 'WOMENWORK'
                        , 'q22c1' : 'TECHGOOD'
                        , 'q22d1' : 'TRADEAG'
                        , 'q22e1' : 'DOWNGOOD'
                        , 'q13'   : 'CHANGE20'
                        , 'q24'   : 'TRADEJOB'
                        , 'q26'   : 'WHYGASSD'
                        , 'q25'   : 'GASPRICE'
                        , 'q21'   : 'PRES'
                        , 'q9'    : 'NEWJOB'
                        , 'q10'   : 'GAP20'
                        , 'q11'   : 'INCOME20'
                        , 'q12'   : 'WAGE20'
                        , 'q13'   : 'NEED2EARN'
                        , 'q14'   : 'STAN5'
                        , 'q18'   : 'CHILDGEN'
                        , 'q49'   : 'CHILDSTAN'
                        , 'q1'    : 'CURECON'}
                , inplace=True)
                

## Selecionando somente as renomeadas e a variável de controle

df_union = df_union[['econ', 'male', 'othrace', 'white', 'asian', 'black', 'jobsecurity', 'yourlast5', 'yournext5', 'income', 'dem', 'rep', 'indep', 'othparty', 'ideology', 'othideol', 'education', 'TAXESHIGH', 'DEFICIT', 'FORAID', 'IMMIG', 'TAXBREAK', 'INADEDUC', 'WELFARE', 'AA', 'HARDWORK', 'REG', 'SAVINGS', 'PROFHIGH', 'EXECPAY', 'BUSPROD', 'TECH', 'OVERSEAS', 'DOWNSIZE', 'COMPEDUC', 'TAXCUT', 'WOMENWORK', 'TECHGOOD', 'TRADEAG', 'DOWNGOOD', 'CHANGE20', 'TRADEJOB', 'WHYGASSD', 'GASPRICE', 'PRES', 'NEWJOB', 'GAP20', 'INCOME20', 'WAGE20', 'NEED2EARN', 'STAN5', 'CHILDGEN', 'CHILDSTAN', 'CURECON']]

KeyError: "['TAXESHIGH', 'DEFICIT', 'FORAID', 'IMMIG', 'TAXBREAK', 'INADEDUC', 'WELFARE', 'AA', 'HARDWORK', 'REG', 'SAVINGS', 'PROFHIGH', 'EXECPAY', 'BUSPROD', 'TECH', 'OVERSEAS', 'DOWNSIZE', 'COMPEDUC', 'TAXCUT', 'WOMENWORK', 'TECHGOOD', 'TRADEAG', 'DOWNGOOD', 'CHANGE20', 'WHYGASSD', 'GAP20', 'CHILDSTAN', 'CURECON'] not in index"