In [26]:
import pandas as pd
from scipy.stats import chi2_contingency
import numpy as np


In [43]:

# File paths for all CSV data
poverty_status_file = "ChiSquareFactors/poverty_status.csv"
urbanicity_file = "ChiSquareFactors/urbanicity.csv"
health_insurance_file = "ChiSquareFactors/health_insurance.csv"
disability_status_file = "ChiSquareFactors/disability_status.csv"
race_ethnicity_file = "ChiSquareFactors/race_ethnicity.csv"
Health_Insurance_18_64_file = "ChiSquareFactors/Health_Insurance_18_64.csv"
gender_identity_file = "ChiSquareFactors/gender_identity.csv"
sexual_orientation_file = "ChiSquareFactors/sexual_orientation.csv"

# Load each CSV into a DataFrame
poverty_status_df = pd.read_csv(poverty_status_file)
urbanicity_df = pd.read_csv(urbanicity_file)
health_insurance_df = pd.read_csv(health_insurance_file)
disability_status_df = pd.read_csv(disability_status_file)
race_ethnicity_df = pd.read_csv(race_ethnicity_file)
Health_Insurance_18_64_df = pd.read_csv(Health_Insurance_18_64_file)
gender_identity_df = pd.read_csv(gender_identity_file)
sexual_orientation_df = pd.read_csv(sexual_orientation_file)



In [45]:
# Define the week to filter by
week_of_interest = "01/06/2024 12:00:00 AM"  # Adjust this to match your data format

# Function to filter and create a contingency table for a specific week
def create_contingency_table(df, week):
    # Filter the DataFrame for the given week
    filtered_df = df[df['Week_ending'] == week]
    
    # Create a contingency table by aggregating 'Yes Vaccination' and 'No Vaccination'
    contingency_table = filtered_df.pivot_table(
        index=filtered_df.columns[-3],  # Assume the demographic column is the third-to-last
        values=['Yes Vaccination', 'No Vaccination'],
        aggfunc='sum'
    )
    
    return contingency_table

# Generate contingency tables for each DataFrame
contingency_tables = {}

for factor, df in dataframes.items():  # Iterate through all the loaded DataFrames
    print(f"Creating contingency table for: {factor}")
    table = create_contingency_table(df, week_of_interest)
    contingency_tables[factor] = table
    print(table)
    print("\n")

Creating contingency table for: Poverty Status
                               No Vaccination  Yes Vaccination
Poverty Status                                                
Above Poverty, Income < $75k        88960.660        62849.340
Above Poverty, Income >= $75k      104487.870       105327.130
Below Poverty                       30634.599        14218.401
Poverty Status Unknown              67509.484        44632.516


Creating contingency table for: Urbanicity
                                   No Vaccination  Yes Vaccination
Urbanicity                                                        
Rural (Non-MSA)                         59939.541        36581.459
Suburban (MSA Non-Principal City)      132340.860       106111.140
Urban MSA Principal City               103944.202        79702.798


Creating contingency table for: Health Insurance
                  No Vaccination  Yes Vaccination
Health Insurance                                 
Insured               246915.448       21634

In [46]:
import pandas as pd
from datetime import datetime

# Function to compute Cramér's V for a contingency table
def calculate_cramers_v(contingency_table):
    from scipy.stats import chi2_contingency
    import numpy as np

    # Perform the Chi-Square test
    if (contingency_table.sum(axis=1) == 0).any() or (contingency_table.sum(axis=0) == 0).any():
        return None  # Skip if there's a zero in any row/column
    chi2_stat, _, _, _ = chi2_contingency(contingency_table)
    n = contingency_table.sum().sum()  # Total sample size
    k = min(contingency_table.shape)  # Minimum of rows or columns
    cramers_v = np.sqrt(chi2_stat / (n * (k - 1)))
    return round(cramers_v, 4)  # Format to 4 decimal places

# Initialize an empty list to store results for all weeks
all_weeks_results = []

# Combine all Week_ending values from the dataframes
unique_weeks = set()
for df in dataframes.values():
    unique_weeks.update(df['Week_ending'].unique())  # Collect unique weeks

# Iterate through each unique week
for week in sorted(unique_weeks):  # Sort weeks chronologically
    cramers_v_results = {'Week_ending': week}
    
    for factor, df in dataframes.items():
        # Filter the DataFrame for the current week
        filtered_df = df[df['Week_ending'] == week]
        
        if filtered_df.empty:
            cramers_v_results[factor] = None
            continue
        
        # Create a contingency table
        contingency_table = filtered_df.pivot_table(
            index=filtered_df.columns[-3],  # Assume demographic column is the third-to-last
            values=['Yes Vaccination', 'No Vaccination'],
            aggfunc='sum'
        ).values  # Convert to NumPy array for compatibility
        
        # Skip if the contingency table contains zero rows/columns
        if contingency_table.size == 0 or (contingency_table.sum(axis=1) == 0).any() or (contingency_table.sum(axis=0) == 0).any():
            cramers_v_results[factor] = None
            continue
        
        # Calculate Cramér's V and store it
        cramers_v_results[factor] = calculate_cramers_v(contingency_table)
    
    # Append the week's results to the list
    all_weeks_results.append(cramers_v_results)

# Convert the results to a DataFrame
results_df = pd.DataFrame(all_weeks_results)

# Convert the Week_ending column to datetime format
results_df['Week_ending'] = pd.to_datetime(results_df['Week_ending'])

# Sort the DataFrame by Week_ending
results_df = results_df.sort_values(by='Week_ending').reset_index(drop=True)

# Display the sorted DataFrame
print("Cramér's V Results for All Weeks (Chronologically Sorted):")
results_df

Cramér's V Results for All Weeks (Chronologically Sorted):


Unnamed: 0,Week_ending,Poverty Status,Urbanicity,Health Insurance,Disability Status,Race/Ethnicity
0,2023-09-30,0.0148,0.0401,0.0685,0.0124,0.034
1,2023-10-07,0.0419,0.0412,0.084,0.0157,0.0511
2,2023-10-14,0.0553,0.0414,0.1024,0.0226,0.0781
3,2023-10-21,0.076,0.0497,0.1175,0.0135,0.0971
4,2023-10-28,0.0839,0.0483,0.1249,0.0104,0.1006
5,2023-11-04,0.1044,0.0439,0.1252,0.0132,0.1146
6,2023-11-11,0.1072,0.0504,0.1309,0.0247,0.1154
7,2023-11-18,0.1163,0.0467,0.1333,0.0179,0.126
8,2023-11-25,0.121,0.0506,0.1294,0.014,0.1204
9,2023-12-02,0.1204,0.0398,0.1483,0.0207,0.1176
