### Prelude:

In [11]:
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import seaborn as sns
import random
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold
import statsmodels.api as sm
set_seed = 42

In [3]:
data = pd.read_csv("./../Datasets/cleaned_data.csv")

In [4]:
#from Tiffanie's code
# distribution of subjects across treatment conditions (like Table 1 from paper)
# N = 186
treatment_freq = data[["treatment_value", "treatment_frame"]].value_counts()
treatment_rel_freq = data["treatment_frame"].value_counts(normalize=True)
treatment_freq.to_frame().sort_index().join(treatment_rel_freq)

Unnamed: 0_level_0,Unnamed: 1_level_0,count,proportion
treatment_value,treatment_frame,Unnamed: 2_level_1,Unnamed: 3_level_1
0,No framing,22,0.11828
1,Positive science,22,0.11828
2,Negative science,18,0.096774
3,Religious,35,0.188172
4,Equity,30,0.16129
5,Efficiency,32,0.172043
6,Secular,27,0.145161


### Difference of Means Tables:

In [5]:
pd.pivot_table(data, values=['mean_climate_support_before', 'mean_climate_support_after', 'mean_climate_support_change'],
               index=['treatment_value','treatment_frame'],
               aggfunc=['mean'])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,mean,mean
Unnamed: 0_level_1,Unnamed: 1_level_1,mean_climate_support_after,mean_climate_support_before,mean_climate_support_change
treatment_value,treatment_frame,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
0,No framing,1.590909,1.545455,0.045455
1,Positive science,1.795455,1.670455,0.125
2,Negative science,1.972222,1.763889,0.208333
3,Religious,1.857143,1.75,0.107143
4,Equity,1.916667,1.866667,0.05
5,Efficiency,1.953125,1.914062,0.039062
6,Secular,1.990741,1.87963,0.111111


In [6]:
pd.pivot_table(data, values=["mean_climate_support_after", 'mean_climate_support_before', 'mean_climate_support_change'],
               index=["party_id", "treatment_frame"], aggfunc=['mean'])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,mean,mean
Unnamed: 0_level_1,Unnamed: 1_level_1,mean_climate_support_after,mean_climate_support_before,mean_climate_support_change
party_id,treatment_frame,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Democrat,Efficiency,2.333333,2.270833,0.0625
Democrat,Equity,1.946429,1.946429,0.0
Democrat,Negative science,2.55,2.2,0.35
Democrat,No framing,1.725,1.6,0.125
Democrat,Positive science,2.34375,2.1875,0.15625
Democrat,Religious,2.026316,1.947368,0.078947
Democrat,Secular,1.980769,1.865385,0.115385
Independent,Efficiency,1.946429,1.803571,0.142857
Independent,Equity,1.9,1.9,0.0
Independent,Negative science,2.0625,2.0,0.0625


### Linear Regression:

In [15]:
# Function to perform Lin's Estimator regression with multiple treatment indicators
def lin_estimator_mult_treat(data, y_var, treatment_vars, covariate_list):
    '''linear regression with robust standard errors'''
    # Demean the covariates
    for cov in covariate_list:
        data[cov + '_demeaned'] = data[cov] - data[cov].mean()

    # Create interaction terms for each treatment and each demeaned covariate
    for treat in treatment_vars:
        for cov in covariate_list:
            data[treat + '_X_' + cov] = data[treat] * data[cov + '_demeaned']

    # Define the regression formula
    # Include each treatment indicator
    treatments_formula = " + ".join(treatment_vars)
    
    # Include each demeaned covariate
    covariates_formula = " + ".join([cov + '_demeaned' for cov in covariate_list])
    
    # Include each interaction term
    interactions_formula = " + ".join([treat + '_X_' + cov for treat in treatment_vars for cov in covariate_list])

    # Full formula
    formula = f"{y_var} ~ {treatments_formula} + {covariates_formula} + {interactions_formula}"

    # Fit the regression model
    model = sm.OLS.from_formula(formula, data=data).fit()

    return model.get_robustcov_results(cov_type="HC3").summary()


In [16]:
#NOT SURE IF THIS IS WORKING AS INTEDED (ESPECIALLY THE TREATMENT VALUES)
result = lin_estimator_mult_treat(data, 'mean_climate_support_after', ['treatment_value'], ['Religiosity', 'Economic_Reasoning', 'ScientificConfidence'])
print(result)

                                OLS Regression Results                                
Dep. Variable:     mean_climate_support_after   R-squared:                       0.185
Model:                                    OLS   Adj. R-squared:                  0.153
Method:                         Least Squares   F-statistic:                     4.847
Date:                        Thu, 25 Apr 2024   Prob (F-statistic):           5.09e-05
Time:                                18:33:12   Log-Likelihood:                -190.51
No. Observations:                         185   AIC:                             397.0
Df Residuals:                             177   BIC:                             422.8
Df Model:                                   7                                         
Covariance Type:                          HC3                                         
                                             coef    std err          t      P>|t|      [0.025      0.975]
-----------------------

### Random Forest:

In [8]:
#Randomly split data into two folds
shuffled = data.sample(frac=1, random_state=set_seed)
split = np.array_split(shuffled, 2)
fold1 = split[0]
fold2 = split[1]

  return bound(*args, **kwds)


In [9]:
data.columns

Index(['GasTax', 'CarbTax', 'Treaty', 'RegCarb', 'political_views', 'party_id',
       'party_id.1', 'party_id.2', 'QID74', 'ScientificConfidence',
       'RewardConsequence ', 'Attention_Check_1', 'Religiosity',
       'Economic_Reasoning', 'Attention_Check_2', 'prosociality_1',
       'prosociality_2', 'prosociality_3', 'prosociality_4', 'prosociality_5',
       'prosociality_6', 'prosociality_7', 'prosociality_8', 'prosociality_9',
       'GasTax_after', 'CarbTax_after', 'Treaty_after', 'RegCarb_after',
       'treatment_value', 'party_id_merged', 'mean_climate_support_before',
       'mean_climate_support_after', 'mean_climate_support_change',
       'treatment_frame', 'Religiosity_demeaned',
       'Economic_Reasoning_demeaned', 'ScientificConfidence_demeaned'],
      dtype='object')

In [10]:
'''
# Define the treatment conditions
treatment_conditions = ['ScientificConfidence', 'Religiosity', 'Economic_Reasoning']

label_encoder = LabelEncoder()


# For each treatment condition, encode the target variable
for treatment in treatment_conditions:
    data_encoded[treatment] = label_encoder.fit_transform(data[treatment])

# Define the number of folds for cross-validation
n_folds = 2

results = []
for train_index, test_index in kf.split(data_encoded):
    train_data, test_data = data_encoded.iloc[train_index], data_encoded.iloc[test_index]
    
    for treatment in treatment_conditions:
        X_train, y_train = train_data.drop(treatment, axis=1), train_data[treatment]
        X_test, y_test = test_data.drop(treatment, axis=1), test_data[treatment]
        
        rf = RandomForestClassifier()
        rf.fit(X_train, y_train)
        
        accuracy = rf.score(X_test, y_test)
        results.append({'Treatment': treatment, 'Fold': kf.get_n_splits(), 'Accuracy': accuracy})

results_df = pd.DataFrame(results)
print(results_df)
'''

NameError: name 'LabelEncoder' is not defined

In [None]:
import sklearn
print(sklearn.__version__)


1.4.2
