# Environmental Health Project Data Analysis

In [19]:
# load python libraries
import sys
import pandas as pd
import numpy as np
import scipy
from scipy.stats import chi2_contingency
from scipy.stats import ttest_ind, t
from scipy.stats import ttest_rel, t
from scipy.stats import sem
from statsmodels.stats.contingency_tables import mcnemar
from statsmodels.stats.proportion import proportion_confint



# print library versions
print()
print('You might want to include the version of Python, as well as relevant libraries in your methods section for the publication.')
print()
print('Python version: {}'.format(sys.version))
print('Pandas version: {}'.format(pd.__version__))
print('Scipy version: {}'.format(scipy.__version__))
# print('statsmodels version: {}'.format(statsmodels.__version__))


ModuleNotFoundError: No module named 'statsmodels'

In [17]:
# clean data
# load data
pre1 = pd.read_csv('raw_data/EH Pre-Survey_April 24, 2024_14.33.csv')
pre2 = pd.read_csv('raw_data/2024 EH Pre-Survey_April 24, 2024_14.35.csv')
post1 = pd.read_csv('raw_data/EH Post-Survey_April 24, 2024_14.34.csv')
post2 = pd.read_csv('raw_data/EH 2024 Post-Survey_April 24, 2024_14.36.csv')

# Collect questions dictionary before cleaning
pre1 = pre1.filter(regex='^Q')
questions = pre1.iloc[0].to_dict()

# remove first two rows from all 4 dataframes
pre1 = pre1[2:]
pre2 = pre2[2:]
post1 = post1[2:]
post2 = post2[2:]

# concatenate pre and post dataframes
pre = pd.concat([pre1, pre2])
post = pd.concat([post1, post2])

# questions to use for wilcoxon test
columns_to_convert = ['Q2_1', 'Q3_1', 'Q3_2', 'Q3_3', 'Q3_4', 'Q3_5', 'Q3_6', 'Q3_7', 'Q8_1', 'Q8_2', 'Q8_3', 'Q8_4', 'Q8_5', 'Q8_6', 'Q9_1']

# Define a function to clean dataframes
def clean_dataframe(df):
    df = df.filter(regex='^Q')  # Keep columns starting with "Q"
    df = df.drop(columns=['Q10'])  # Drop Q10 column
    df = df.dropna(how='all')  # if a row is completely empty, drop it
    for column in columns_to_convert:
        df[column] = pd.to_numeric(df[column], errors='coerce')  # Convert columns to numeric data type
    return df

# Clean each dataframe without enumeration
pre_unmatched = clean_dataframe(pre)
post_unmatched = clean_dataframe(post)
pre1 = clean_dataframe(pre1)
pre2 = clean_dataframe(pre2)

# create pre_matched and post_matched dataframes. Start by removing when 'Q1' is missing
pre_matched = pre.dropna(subset=['Q1'])
post_matched = post.dropna(subset=['Q1'])

# only keep rows where there is a matching NetID
# make Q1 to upper case
pre_matched['Q1'] = pre_matched['Q1'].str.upper()
post_matched['Q1'] = post_matched['Q1'].str.upper()
pre_matched = pre_matched[pre_matched['Q1'].isin(post_matched['Q1'])]
post_matched = post_matched[post_matched['Q1'].isin(pre_matched['Q1'])]

# sort so that 'Q1' is the same order
pre_matched = pre_matched.sort_values(by='Q1')
post_matched = post_matched.sort_values(by='Q1')

# print which ids are duplicated and how many duplications there are
print('There are', pre_matched.duplicated(subset='Q1').sum(), 'duplicated IDs in pre_matched')
print('There are', post_matched.duplicated(subset='Q1').sum(), 'duplicated IDs in post_matched')

#remove duplicate IDs
pre_matched = pre_matched.drop_duplicates(subset='Q1')
post_matched = post_matched.drop_duplicates(subset='Q1')

# make 'Q1' the index
pre_matched = pre_matched.set_index('Q1')
post_matched = post_matched.sort_values('Q1')

#print an error if there are duplicates in index
if pre_matched.index.duplicated().any():
    print('There are duplicates in the index of pre_matched')
if post_matched.index.duplicated().any():
    print('There are duplicates in the index of post_matched')

# drop 'Q1' column in other data
pre_unmatched = pre_unmatched.drop(columns=['Q1'])
post_unmatched = post_unmatched.drop(columns=['Q1'])
pre1 = pre1.drop(columns=['Q1'])
pre2 = pre2.drop(columns=['Q1'])


dataframes = [pre_matched, post_matched, pre1, pre2, pre_unmatched, post_unmatched]

# Create separate data frames for categorical and numerical data
# define categorical columns as columns not within the columns to convert
categorical_columns = [column for column in pre1.columns if column not in columns_to_convert]
pre_categorical_unmatched = pre[categorical_columns]
post_categorical_unmatched = post[categorical_columns]
pre1_categorical = pre1[categorical_columns]
pre2_categorical = pre2[categorical_columns]
pre_matched_categorical = pre_matched[categorical_columns]
post_matched_categorical = post_matched[categorical_columns]

# remove the columns that are not in columns to convert from the dataframes
for df in dataframes:
    for column in df.columns:
        if column not in columns_to_convert:
            df.drop(column, axis=1, inplace=True)
        #convert to numeric
        else:
            df[column] = pd.to_numeric(df[column], errors='coerce')

#print shape of data frames
print('The shape of the dataframes are as follows:')
print()
print('pre_matched:', pre_matched.shape)
print('post_matched:', post_matched.shape)
print('pre1:', pre1.shape)
print('pre2:', pre2.shape)
print('pre_unmatched:', pre_unmatched.shape)
print('post_unmatched:', post_unmatched.shape)
print('pre_matched_categorical:', pre_matched_categorical.shape)
print('post_matched_categorical:', post_matched_categorical.shape)
print('pre1_categorical:', pre1_categorical.shape)
print('pre2_categorical:', pre2_categorical.shape)
print('pre_categorical_unmatched:', pre_categorical_unmatched.shape)
print('post_categorical_unmatched:', post_categorical_unmatched.shape)

# save cleaned data
pre_matched.to_csv('cleaned_data/pre_matched.csv')
post_matched.to_csv('cleaned_data/post_matched.csv')
pre1.to_csv('cleaned_data/pre1.csv')
pre2.to_csv('cleaned_data/pre2.csv')
pre_unmatched.to_csv('cleaned_data/pre_unmatched.csv')
post_unmatched.to_csv('cleaned_data/post_unmatched.csv')
pre_matched_categorical.to_csv('cleaned_data/pre_matched_categorical.csv')
post_matched_categorical.to_csv('cleaned_data/post_matched_categorical.csv')
pre1_categorical.to_csv('cleaned_data/pre1_categorical.csv')
pre2_categorical.to_csv('cleaned_data/pre2_categorical.csv')
pre_categorical_unmatched.to_csv('cleaned_data/pre_categorical_unmatched.csv')
post_categorical_unmatched.to_csv('cleaned_data/post_categorical_unmatched.csv')

There are 0 duplicated IDs in pre_matched
There are 1 duplicated IDs in post_matched
There are duplicates in the index of post_matched
The shape of the dataframes are as follows:

pre_matched: (87, 15)
post_matched: (87, 15)
pre1: (103, 15)
pre2: (101, 15)
pre_unmatched: (204, 15)
post_unmatched: (143, 15)
pre_matched_categorical: (87, 4)
post_matched_categorical: (87, 4)
pre1_categorical: (103, 4)
pre2_categorical: (101, 4)
pre_categorical_unmatched: (237, 4)
post_categorical_unmatched: (160, 4)


In [18]:
#print questions dictionary
questions

{'Q1': 'NetID',
 'Q2_1': 'How much of an impact do you believe that climate change will have on human health in your lifetime? - Impact',
 'Q3_1': 'For each of the following specialties,  impacted by climate change, do you believe each of the following specialties will be? (Please rank on a scale of 0 - 5) - Cardiology',
 'Q3_2': 'For each of the following specialties,  impacted by climate change, do you believe each of the following specialties will be? (Please rank on a scale of 0 - 5) - Dermatology',
 'Q3_3': 'For each of the following specialties,  impacted by climate change, do you believe each of the following specialties will be? (Please rank on a scale of 0 - 5) - Infectious Disease',
 'Q3_4': 'For each of the following specialties,  impacted by climate change, do you believe each of the following specialties will be? (Please rank on a scale of 0 - 5) - Neurology',
 'Q3_5': 'For each of the following specialties,  impacted by climate change, do you believe each of the following

In [9]:
# Create functions for calculating statistics

def calculate_statistics(df1, df2, statistical_test, tail='two-sided'):
    correct_answers = {
        'Q4_1': "3",
        'Q5': 'PM < 2.5 μm',
        'Q6': "Particles of this size are generally absorbed in the respiratory tract and safely excreted in mucus.",
        'Q7': "Cars,Fireplaces,Trucks"
    }

    if statistical_test == 'unpaired_t_test':
        unpaired_t_test_stats = pd.DataFrame(columns=['Question', 'Statistical_Test', 'stat', 'p-value', 'Effect Size', 'average_1', 'average_2', 'sd1', 'sd2', 'se1', 'se2', 'n1', 'n2', 'ci1', 'ci2'])

        for column in df1.columns:
            x = df1[column].dropna()
            y = df2[column].dropna()
            stat, p_value = ttest_ind(x, y, equal_var=False)  # Use equal_var=False for Welch's t-test, which does not assume equal population variance
            if tail == 'one-sided':
                p_value /= 2
            average_1 = x.mean()
            average_2 = y.mean()
            sd1 = x.std()
            sd2 = y.std()
            se1 = sem(x)
            se2 = sem(y)
            n1 = len(x)
            n2 = len(y)
            ci1 = t.interval(0.95, n1-1, loc=average_1, scale=se1)
            ci2 = t.interval(0.95, n2-1, loc=average_2, scale=se2)
            ci1 = (round(ci1[0], 3), round(ci1[1], 3))
            ci2 = (round(ci2[0], 3), round(ci2[1], 3))

            # Calculate effect size as Cohen's d
            pooled_sd = np.sqrt(((n1 - 1) * sd1 ** 2 + (n2 - 1) * sd2 ** 2) / (n1 + n2 - 2))
            effect_size = np.abs((average_1 - average_2) / pooled_sd)

            unpaired_t_test_stats.loc[len(unpaired_t_test_stats)] = [column, statistical_test, stat, p_value, effect_size, average_1, average_2, sd1, sd2, se1, se2, n1, n2, ci1, ci2]
        unpaired_t_test_stats = unpaired_t_test_stats.round(3)

        return unpaired_t_test_stats

    elif statistical_test == 'paired_t_test':
        paired_t_test_stats = pd.DataFrame(columns=['Question', 'Statistical_Test', 'p-value', 'Effect Size', 'average_1', 'average_2', 'sd1', 'sd2', 'se1', 'se2', 'n1', 'n2', 'ci1', 'ci2'])
        for column in df1.columns:
            x = df1[column].dropna().reset_index(drop=True)
            y = df2[column].dropna().reset_index(drop=True)
            paired_data = pd.concat([x, y], axis=1, keys=['df1', 'df2']).dropna()
            stat, p_value = ttest_rel(paired_data['df1'], paired_data['df2'])
            if tail == 'one-sided':
                p_value /= 2
            average_1 = paired_data['df1'].mean()
            average_2 = paired_data['df2'].mean()
            sd1 = paired_data['df1'].std()
            sd2 = paired_data['df2'].std()
            se1 = sem(paired_data['df1'])
            se2 = sem(paired_data['df2'])
            n1 = len(paired_data['df1'])
            n2 = len(paired_data['df2'])
            ci1 = t.interval(0.95, n1-1, loc=average_1, scale=se1)
            ci2 = t.interval(0.95, n2-1, loc=average_2, scale=se2)
            ci1 = (round(ci1[0], 3), round(ci1[1], 3))
            ci2 = (round(ci2[0], 3), round(ci2[1], 3))

            # Calculate effect size as Cohen's d for paired samples
            sd_diff = np.std(paired_data['df1'] - paired_data['df2'], ddof=1)
            effect_size = (average_1 - average_2) / sd_diff

            paired_t_test_stats.loc[len(paired_t_test_stats)] = [column, statistical_test, p_value, effect_size, average_1, average_2, sd1, sd2, se1, se2, n1, n2, ci1, ci2]
        paired_t_test_stats = paired_t_test_stats.round(3)

        return paired_t_test_stats


    elif statistical_test == 'chi_squared_test':
        chi_squared_stats = pd.DataFrame(columns=['Question', 'Statistical_Test', 'P-value', 'Chi2 Statistic', 'n1', 'n2', 'Count_1_correct', 'Count_2_correct', 'Count_1_incorrect', 'Count_2_incorrect'])
        for column in df1.columns:
            if column in correct_answers:
                correct_1 = df1[column].map(str) == correct_answers[column]
                correct_2 = df2[column].map(str) == correct_answers[column]
                incorrect_1 = ~correct_1
                incorrect_2 = ~correct_2
                count_1_correct = correct_1.sum()
                count_2_correct = correct_2.sum()
                count_1_incorrect = incorrect_1.sum()
                count_2_incorrect = incorrect_2.sum()
                
                # Create contingency table
                contingency_table = [[count_1_correct, count_2_correct],
                                    [count_1_incorrect, count_2_incorrect]]
                
                # Perform Chi-squared test
                chi2, p_value, _, _ = chi2_contingency(contingency_table)
                
                n1 = len(df1[column])
                n2 = len(df2[column])
                
                chi_squared_stats.loc[len(chi_squared_stats)] = [column, statistical_test, p_value, chi2, n1, n2, count_1_correct, count_2_correct, count_1_incorrect, count_2_incorrect]
        return chi_squared_stats.round(3)
    
    if statistical_test == 'mcnemar':
        # Create a dataframe to store results
        mcnemar_stats = pd.DataFrame(columns=['Question', 'Statistical_Test', 'P-value', 'n1', 'n2', 'Count_1', 'Count_2', 'Test Statistic', 'Proportion_1', 'Proportion_2', 'ci1', 'ci2'])

        for column in df1.columns:
             if column in correct_answers:
                correct_1 = df1[column].astype(str) == correct_answers[column]
                correct_2 = df2[column].astype(str) == correct_answers[column]
                count_1 = correct_1.sum()
                count_2 = correct_2.sum()
                n1 = len(df1[column])
                n2 = len(df2[column])
                
                # Create contingency table
                contingency_table = [[(correct_1 & correct_2).sum(), (~correct_1 & correct_2).sum()],
                                    [(correct_1 & ~correct_2).sum(), (~correct_1 & ~correct_2).sum()]]
                
                # Perform McNemar's test
                result = mcnemar(contingency_table)
                p_value = result.pvalue
                
                # Calculate proportions and confidence intervals
                proportion_1 = count_1 / n1
                proportion_2 = count_2 / n2
                ci1 = proportion_confint(count_1, n1, alpha=0.05, method='normal')
                ci2 = proportion_confint(count_2, n2, alpha=0.05, method='normal')
                # Round confidence intervals
                ci1 = (round(ci1[0], 3), round(ci1[1], 3))
                ci2 = (round(ci2[0], 3), round(ci2[1], 3))
                
                mcnemar_stats.loc[len(mcnemar_stats)] = [column, statistical_test, p_value, n1, n2, count_1, count_2, result.statistic, proportion_1, proportion_2, ci1, ci2]
        
        return mcnemar_stats.round(3)
    
    if statistical_test != 'wilcoxon_rank_sum' or statistical_test != 'wilcoxon_signed_rank' or statistical_test != 'proportions_ztest' or statistical_test != 'mcnemar':
        print('Invalid statistical test. Please choose from the following: wilcoxon_rank_sum, wilcoxon_signed_rank, proportions_ztest, mcnemar')

In [180]:
calculate_statistics(pre1, pre2, 'wilcoxon_rank_sum')

Unnamed: 0,Question,Statistical_Test,stat,p-value,Effect Size,average_1,average_2,sd1,sd2,se1,se2,n1,n2,ci1,ci2
0,Q2_1,wilcoxon_rank_sum,-0.683,0.494,0.057,4.127,4.178,0.852,0.942,0.084,0.094,102,101,"(3.96, 4.295)","(3.992, 4.364)"
1,Q3_1,wilcoxon_rank_sum,-1.071,0.284,0.141,3.775,3.929,1.089,1.1,0.108,0.111,102,99,"(3.561, 3.988)","(3.71, 4.149)"
2,Q3_2,wilcoxon_rank_sum,-1.117,0.264,0.129,3.922,4.061,1.059,1.101,0.105,0.111,102,98,"(3.713, 4.13)","(3.84, 4.282)"
3,Q3_3,wilcoxon_rank_sum,0.531,0.596,0.148,4.422,4.278,0.849,1.078,0.084,0.109,102,97,"(4.255, 4.588)","(4.061, 4.496)"
4,Q3_4,wilcoxon_rank_sum,-1.145,0.252,0.176,3.059,3.289,1.302,1.299,0.13,0.132,101,97,"(2.802, 3.317)","(3.027, 3.55)"
5,Q3_5,wilcoxon_rank_sum,-1.109,0.267,0.123,3.58,3.74,1.265,1.338,0.126,0.134,100,100,"(3.329, 3.831)","(3.475, 4.005)"
6,Q3_6,wilcoxon_rank_sum,-0.019,0.985,0.082,4.608,4.67,0.858,0.652,0.085,0.065,102,100,"(4.439, 4.776)","(4.541, 4.799)"
7,Q3_7,wilcoxon_rank_sum,-0.657,0.511,0.112,2.804,2.949,1.288,1.287,0.131,0.13,97,98,"(2.545, 3.064)","(2.691, 3.207)"
8,Q8_1,wilcoxon_rank_sum,-0.44,0.66,0.069,3.237,3.326,1.297,1.293,0.132,0.135,97,92,"(2.976, 3.499)","(3.058, 3.594)"
9,Q8_2,wilcoxon_rank_sum,1.38,0.168,0.195,2.674,2.396,1.432,1.413,0.147,0.148,95,91,"(2.382, 2.965)","(2.101, 2.69)"


In [181]:
calculate_statistics(pre1_categorical, pre2_categorical, 'proportions_ztest')

Unnamed: 0,Question,Statistical_Test,P-value,odds_ratio,n1,n2,Proportion_1,Proportion_2,ci1,ci2,Count_1,Count_2,Test Statistic
0,Q4_1,proportions_ztest,0.805575,0.916071,103,101,0.184466,0.19802,"(0.11, 0.259)","(0.12, 0.276)",19,20,-0.246139
1,Q5,proportions_ztest,0.202648,0.643765,103,101,0.174757,0.247525,"(0.101, 0.248)","(0.163, 0.332)",18,25,-1.274044
2,Q6,proportions_ztest,0.692082,1.134933,103,101,0.271845,0.247525,"(0.186, 0.358)","(0.163, 0.332)",28,25,0.396032
3,Q7,proportions_ztest,0.247973,2.042105,103,101,0.07767,0.039604,"(0.026, 0.129)","(0.002, 0.078)",8,4,1.155287


In [182]:
calculate_statistics(pre_unmatched, post_unmatched, 'wilcoxon_rank_sum', 'one-sided')

Unnamed: 0,Question,Statistical_Test,stat,p-value,Effect Size,average_1,average_2,sd1,sd2,se1,se2,n1,n2,ci1,ci2
0,Q2_1,wilcoxon_rank_sum,-5.094,0.0,0.633,4.153,4.643,0.896,0.633,0.063,0.053,203,143,"(4.029, 4.277)","(4.539, 4.748)"
1,Q3_1,wilcoxon_rank_sum,-3.596,0.0,0.465,3.851,4.303,1.094,0.834,0.077,0.07,201,142,"(3.699, 4.003)","(4.165, 4.441)"
2,Q3_2,wilcoxon_rank_sum,-0.559,0.288,0.028,3.99,4.021,1.08,1.166,0.076,0.097,200,143,"(3.839, 4.141)","(3.828, 4.214)"
3,Q3_3,wilcoxon_rank_sum,-0.974,0.165,0.163,4.352,4.497,0.968,0.804,0.069,0.067,199,143,"(4.216, 4.487)","(4.364, 4.629)"
4,Q3_4,wilcoxon_rank_sum,-2.913,0.002,0.329,3.172,3.603,1.302,1.32,0.093,0.111,198,141,"(2.989, 3.354)","(3.383, 3.823)"
5,Q3_5,wilcoxon_rank_sum,-1.054,0.146,0.137,3.66,3.831,1.301,1.197,0.092,0.1,200,142,"(3.479, 3.841)","(3.632, 4.03)"
6,Q3_6,wilcoxon_rank_sum,-1.688,0.046,0.327,4.639,4.838,0.762,0.406,0.054,0.034,202,142,"(4.533, 4.744)","(4.771, 4.905)"
7,Q3_7,wilcoxon_rank_sum,-2.517,0.006,0.297,2.877,3.266,1.286,1.333,0.092,0.113,195,139,"(2.695, 3.059)","(3.043, 3.49)"
8,Q8_1,wilcoxon_rank_sum,-4.41,0.0,0.513,3.28,3.913,1.293,1.168,0.094,0.099,189,138,"(3.095, 3.466)","(3.716, 4.11)"
9,Q8_2,wilcoxon_rank_sum,-6.944,0.0,0.873,2.538,3.717,1.426,1.273,0.105,0.108,186,138,"(2.331, 2.744)","(3.503, 3.932)"


In [183]:
calculate_statistics(pre_categorical_unmatched, post_categorical_unmatched, 'proportions_ztest', 'one-sided')

Unnamed: 0,Question,Statistical_Test,P-value,odds_ratio,n1,n2,Proportion_1,Proportion_2,ci1,ci2,Count_1,Count_2,Test Statistic
0,Q4_1,proportions_ztest,5.413126e-10,0.246906,237,160,0.164557,0.44375,"(0.117, 0.212)","(0.367, 0.521)",39,71,-6.096726
1,Q5,proportions_ztest,1.368085e-22,0.112916,237,160,0.181435,0.6625,"(0.132, 0.23)","(0.589, 0.736)",43,106,-9.709896
2,Q6,proportions_ztest,1.644728e-05,0.399822,237,160,0.223629,0.41875,"(0.171, 0.277)","(0.342, 0.495)",53,67,-4.152442
3,Q7,proportions_ztest,3.385679e-06,0.221935,237,160,0.050633,0.19375,"(0.023, 0.079)","(0.133, 0.255)",12,31,-4.500752


In [184]:
calculate_statistics(pre_matched, post_matched, 'wilcoxon_signed_rank')

Unnamed: 0,Question,Statistical_Test,p-value,Effect Size,average_1,average_2,sd1,sd2,se1,se2,n1,n2,ci1,ci2
0,Q2_1,wilcoxon_signed_rank,0.0,0.0,4.23,4.724,0.872,0.543,0.093,0.058,87,87,"(4.044, 4.416)","(4.608, 4.84)"
1,Q3_1,wilcoxon_signed_rank,0.002,39.251,3.895,4.349,1.106,0.837,0.119,0.09,86,86,"(3.658, 4.133)","(4.169, 4.528)"
2,Q3_2,wilcoxon_signed_rank,0.862,54.504,4.2,4.224,0.961,1.028,0.104,0.111,85,85,"(3.993, 4.407)","(4.002, 4.445)"
3,Q3_3,wilcoxon_signed_rank,0.037,11.484,4.384,4.581,0.96,0.759,0.104,0.082,86,86,"(4.178, 4.59)","(4.419, 4.744)"
4,Q3_4,wilcoxon_signed_rank,0.006,57.215,3.141,3.682,1.364,1.274,0.148,0.138,85,85,"(2.847, 3.435)","(3.407, 3.957)"
5,Q3_5,wilcoxon_signed_rank,0.347,65.94,3.733,3.907,1.192,1.113,0.129,0.12,86,86,"(3.477, 3.988)","(3.668, 4.146)"
6,Q3_6,wilcoxon_signed_rank,0.059,3.216,4.77,4.874,0.522,0.367,0.056,0.039,87,87,"(4.659, 4.881)","(4.795, 4.952)"
7,Q3_7,wilcoxon_signed_rank,0.057,86.384,2.904,3.289,1.34,1.321,0.147,0.145,83,83,"(2.611, 3.196)","(3.001, 3.578)"
8,Q8_1,wilcoxon_signed_rank,0.001,52.045,3.321,3.976,1.328,1.086,0.145,0.119,84,84,"(3.033, 3.61)","(3.74, 4.212)"
9,Q8_2,wilcoxon_signed_rank,0.0,34.082,2.554,3.747,1.399,1.177,0.154,0.129,83,83,"(2.249, 2.86)","(3.49, 4.004)"


In [185]:
calculate_statistics(pre_matched_categorical, post_matched_categorical, 'mcnemar', 'one-sided')

Unnamed: 0,Question,Statistical_Test,P-value,n1,n2,Count_1,Count_2,Test Statistic,Proportion_1,Proportion_2,ci1,ci2
0,Q4_1,mcnemar,1.0,87,87,21,45,0.0,0.241,0.517,"(0.151, 0.331)","(0.412, 0.622)"
1,Q5,mcnemar,1.0,87,87,18,65,0.0,0.207,0.747,"(0.122, 0.292)","(0.656, 0.838)"
2,Q6,mcnemar,1.0,87,87,23,43,0.0,0.264,0.494,"(0.172, 0.357)","(0.389, 0.599)"
3,Q7,mcnemar,1.0,87,87,7,18,0.0,0.08,0.207,"(0.023, 0.138)","(0.122, 0.292)"
