In [17]:
import pandas as pd
import re
import os
import pingouin as pg

In [18]:
# Set working directory
os.chdir('/Users/nsusser/Desktop/Github/happyDB/')

# Load the main data and reverse-coded items
results = pd.read_csv('data/filtered_ratings.csv')  
reverse_coded_items_df = pd.read_csv('Profiles/reversed_column_names.csv')

In [19]:
# Display the first few rows of both datasets
print(results.columns)

Index(['hmid', 'cleaned_hm',
       'PERMA_Accomplishment_the_speaker_felt_they_were_making_progress_towards_accomplishing_their_goals?',
       'PERMA_Engagement_the_speaker_became_absorbed_in_what_they_were_doing?',
       'PERMA_Positive_Emotion_the_speaker_felt_joyful?',
       'PERMA_Negative_emotion_the_speaker_felt_anxious?',
       'PERMA_Accomplishment_the_speaker_achieved_the_important_goals_they_set_for_themselves?',
       'PERMA_Health_the_speaker_perceived_their_health_positively?',
       'PERMA_Meaning_the_speaker_felt_their_life_was_purposeful_and_meaningful?',
       'PERMA_Relationships_the_speaker_received_help_and_support_from_others_when_needed?',
       ...
       'CIT_Optimism_the_speaker_expected_more_good_things_in_their_life_than_bad?',
       'CIT_Subjective_Well-Being_-_Life_Satisfaction_the_speaker_felt_in_most_ways_their_life_was_close_to_their_ideal?',
       'CIT_Subjective_Well-Being_-_Life_Satisfaction_the_speaker_felt_satisfied_with_their_life?',
   

In [20]:
# Load your CSV (assuming it's already in a DataFrame)
scales_df = pd.read_csv('dataframes/scales_clean.csv')  # replace with actual path

# Clean the columns
scales_df['Scale'] = scales_df['Scale'].str.strip().str.replace(r"[^\w\s]", "").str.replace(r"\s+", "_", regex=True)
scales_df['Dimension'] = scales_df['Dimension'].str.strip().str.replace(r"[^\w\s]", "").str.replace(r"\s+", "_", regex=True)

# Optionally, make a unique ID if needed
scales_df['Scale_Dimension'] = scales_df['Scale'] + "_" + scales_df['Dimension']


In [21]:
reversed_columns = reverse_coded_items_df['flat_name'].tolist()

# Get the maximum value in the dataset
max_value = 7  # Max value in the dataset

# Reverse coding based on the flattened column names
for col in reversed_columns:
    print(f"Reversing column: {col}")
    if col in results.columns:
        results[col] = max_value + 1 - results[col]
    else:
        print(f"Column {col} not found in results DataFrame.")

Reversing column: CIT_Autonomy_-_Control_other_people_decided_most_of_the_speaker's_life_decisions?
Reversing column: CIT_Autonomy_-_Control_the_life_choices_the_speaker_made_were_not_really_theirs?
Reversing column: CIT_Autonomy_-_Control_other_people_decided_what_the_speaker_could_and_could_not_do?
Reversing column: CIT_Subjective_Well-Being_-_Negative_Feelings_the_speaker_felt_negative_most_of_the_time?
Reversing column: CIT_Subjective_Well-Being_-_Negative_Feelings_the_speaker_experienced_unhappy_feelings_most_of_the_time?
Reversing column: CIT_Subjective_Well-Being_-_Negative_Feelings_the_speaker_felt_bad_most_of_the_time?
Reversing column: PWB_Autonomy_the_speaker_was_not_afraid_to_voice_their_opinions,_even_when_they_were_in_opposition_to_others?
Reversing column: PWB_Personal_Growth_the_speaker_was_continuously_in_the_process_of_learning,_changing,_and_growing?
Reversing column: PWB_Environmental_Mastery_the_speaker_felt_in_charge_of_the_situation?
Reversing column: PWB_Positiv

In [22]:
# Generate a sorted list of (scale, dimension) pairs from scaled_df
dimensions = sorted(scales_df[['Scale', 'Dimension']].drop_duplicates().values.tolist())


In [23]:
# Define a function to clean and sanitize text
def clean_text(text):
    return text.strip().replace(" ", "_")

# Container to hold alpha results
alpha_results = []

# Iterate over scale-dimension pairs
for scale, dimension in dimensions:
    # Clean names for file path consistency
    scale_name = clean_text(scale)
    dimension_name = clean_text(dimension)

    # Find all matching item columns
    prefix = f"{scale_name}_{dimension_name}_"
    dimension_items = [col for col in results.columns if col.startswith(prefix)]

    if len(dimension_items) > 1:
        dim_data = results[dimension_items]

        # Drop rows with NaNs to avoid alpha computation issues
        dim_data_clean = dim_data.dropna()

        # Compute correlation matrix
        corr_matrix = dim_data.corr()

        # Save correlation matrix
        output_path = f'analysis_data/item_correlations/Intra-Scales Dimension/{scale_name}/{dimension_name}_correlation_matrix.xlsx'
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        corr_matrix.to_excel(output_path)

        print(f"Saved correlation matrix: {output_path}")

        # Compute Cronbach's alpha
        alpha_value, _ = pg.cronbach_alpha(data=dim_data_clean)

        # Store alpha result
        alpha_results.append({
            'Scale': scale_name,
            'Dimension': dimension_name,
            'Num_Items': len(dimension_items),
            'Cronbach_Alpha': round(alpha_value, 4)
        })

# After loop: save all alpha results to CSV
alpha_df = pd.DataFrame(alpha_results)
output_alpha_path = 'analysis_data/item_correlations/Cronbach_Alpha_Summary.xlsx'
os.makedirs(os.path.dirname(output_alpha_path), exist_ok=True)
alpha_df.to_excel(output_alpha_path, index=False)

print(f"Cronbach's alpha summary saved to: {output_alpha_path}")

Saved correlation matrix: analysis_data/item_correlations/Intra-Scales Dimension/CIT/Autonomy_-_Control_correlation_matrix.xlsx
Saved correlation matrix: analysis_data/item_correlations/Intra-Scales Dimension/CIT/Engagement_correlation_matrix.xlsx
Saved correlation matrix: analysis_data/item_correlations/Intra-Scales Dimension/CIT/Mastery_-_Accomplishment_correlation_matrix.xlsx
Saved correlation matrix: analysis_data/item_correlations/Intra-Scales Dimension/CIT/Mastery_-_Learning_correlation_matrix.xlsx
Saved correlation matrix: analysis_data/item_correlations/Intra-Scales Dimension/CIT/Mastery_-_Self-Efficacy_correlation_matrix.xlsx
Saved correlation matrix: analysis_data/item_correlations/Intra-Scales Dimension/CIT/Mastery_-_Self-Worth_correlation_matrix.xlsx
Saved correlation matrix: analysis_data/item_correlations/Intra-Scales Dimension/CIT/Mastery_-_Skills_correlation_matrix.xlsx
Saved correlation matrix: analysis_data/item_correlations/Intra-Scales Dimension/CIT/Meaning_correlat