In [None]:
!pip install pyreadstat
import numpy as np
import pandas as pd
import pyreadstat




def read_stata_file(file_path):
    # Read Stata file into a DataFrame
    df, _ = pyreadstat.read_dta(file_path)
    return df

# Load  Stata dataset
your_data = read_stata_file('Butler_Broockman_AJPS_2011_public_dta.dta')
your_data = your_data.dropna(subset=['reply_atall', 'treat_group', 'treat_demprimary', 'treat_repprimary'])
your_data['reply_atall'] = your_data['reply_atall'].astype(float)

print(your_data)




     leg_party  leg_republican  leg_black  leg_latino  reply_atall  \
0            R               1          0           0          1.0   
1            D               0          0           0          1.0   
2            R               1          0           0          0.0   
3            R               1          0           0          0.0   
4            D               0          0           0          0.0   
...        ...             ...        ...         ...          ...   
4854         D               0          0           0          1.0   
4855         R               1          0           0          1.0   
4856         D               0          0           0          1.0   
4857         R               1          0           0          1.0   
4858         R               1          0           0          0.0   

      treat_deshawn  treat_demprimary  treat_repprimary  treat_noprimary  \
0                 0                 1                 0                0   
1      

In [None]:
import pandas as pd
import numpy as np

def permutation_test(data, treatment_column, outcome_column, num_permutations=1000, seed=None):
    if seed:
        np.random.seed(seed)

    observed_diff = data.groupby(treatment_column)[outcome_column].mean().diff().iloc[-1]

    permutations = []
    for _ in range(num_permutations):
        shuffled_outcome = (
            data[outcome_column]
            .sample(frac=1, replace=False)
            .reset_index(drop=True)
        )
        shuffled_data = data.assign(**{outcome_column: shuffled_outcome})
        diff = shuffled_data.groupby(treatment_column)[outcome_column].mean().diff().iloc[-1]
        permutations.append(diff)

    p_value = (np.abs(permutations) >= np.abs(observed_diff)).mean()

    return {'observed_statistic': observed_diff, 'p_value': p_value}


result_no_partisanship = permutation_test(your_data, 'treat_noprimary', 'reply_atall', seed=42)
result_democratic = permutation_test(your_data, 'treat_demprimary', 'reply_atall', seed=42)
result_republican = permutation_test(your_data, 'treat_repprimary', 'reply_atall', seed=42)

print("No Partisanship Signal:")
print(result_no_partisanship)

print("\nDemocratic Signal:")
print(result_democratic)

print("\nRepublican Signal:")
print(result_republican)


No Partisanship Signal:
{'observed_statistic': 0.02064040421530533, 'p_value': 0.202}

Democratic Signal:
{'observed_statistic': -0.0034981179269369944, 'p_value': 0.845}

Republican Signal:
{'observed_statistic': -0.017086906048901174, 'p_value': 0.267}


In [None]:


# Select data for Republican legislators under no partisanship signal
republican_no_partisanship = your_data[(your_data['leg_party'] == 'R') & (your_data['treat_noprimary'] == 1)]

# Calculate the mean response for DeShawn and Jake for Republicans
sate_republican = republican_no_partisanship['treat_jake'].mean() - republican_no_partisanship['treat_deshawn'].mean()

# Select data for Democratic legislators under no partisanship signal
democrat_no_partisanship = your_data[(your_data['leg_party'] == 'D') & (your_data['treat_noprimary'] == 1)]

# Calculate the mean response for DeShawn and Jake for Democrats
sate_democrat = democrat_no_partisanship['treat_jake'].mean() - democrat_no_partisanship['treat_deshawn'].mean()

# Print the results
print("SATE for Republican Legislators:", sate_republican)
print("SATE for Democratic Legislators:", sate_democrat)


SATE for Republican Legislators: 0.00552486187845308
SATE for Democratic Legislators: 0.0022371364653244186
