In [1]:
import pandas as pd
import os

In [2]:
# Set working directory
os.chdir('/Users/nsusser/Desktop/Github/happyDB/')

# Load the main data and reverse-coded items
results = pd.read_csv('data/filtered_ratings.csv')  
reverse_coded_items_df = pd.read_csv('profiles/merged_reverse_coded_items_only.csv')

In [3]:
# Display the first few rows of both datasets
print(results.head())


    hmid                                         cleaned_hm  \
0  27673  I went on a successful date with someone I fel...   
1  27674  I was happy when my son got 90% marks in his e...   
2  27675       I went to the gym this morning and did yoga.   
3  27676  We had a serious talk with some friends of our...   
4  27677  I went with grandchildren to butterfly display...   

   PERMA_Accomplishment_the_speaker_felt_they_were_making_progress_towards_accomplishing_their_goals?  \
0                                                6.0                                                    
1                                                6.0                                                    
2                                                5.0                                                    
3                                                6.0                                                    
4                                                3.0                                            

In [4]:
# Clean and sanitize column names
reverse_coded_items_df['Scale'] = reverse_coded_items_df['Scale'].str.strip().str.replace(r"\s+", "_", regex=True)
reverse_coded_items_df['Dimension'] = reverse_coded_items_df['Dimension'].str.strip().str.replace(r"\s+", "_", regex=True)
reverse_coded_items_df['Items'] = reverse_coded_items_df['Items'].str.strip().str.replace(r"\s+", "_", regex=True)
print(reverse_coded_items_df.head())

  Scale                                  Dimension  \
0   CIT                         Autonomy_-_Control   
1   CIT                         Autonomy_-_Control   
2   CIT                         Autonomy_-_Control   
3   CIT  Subjective_Well-Being_-_Negative_Feelings   
4   CIT  Subjective_Well-Being_-_Negative_Feelings   

                                               Items  
0  other_people_decided_most_of_the_speaker's_lif...  
1  the_life_choices_the_speaker_made_were_not_rea...  
2  other_people_decided_what_the_speaker_could_an...  
3        the_speaker_felt_negative_most_of_the_time?  
4  the_speaker_experienced_unhappy_feelings_most_...  


In [5]:
# Create flattened column names
reversed_columns = [f"{scale}_{dimension}_{item}" for scale, dimension, item in zip(
    reverse_coded_items_df['Scale'], reverse_coded_items_df['Dimension'], reverse_coded_items_df['Items']
)]

# Get the maximum value in the dataset
max_value = 7  # Max value in the dataset

# Reverse coding based on the flattened column names
for col in reversed_columns:
    results[col] = max_value + 1 - results[col]

In [6]:
# Step 2: Define specific groupings for analysis
groupings = {
    "Accomplishment and Mastery": [
        ("PERMA", "Accomplishment"),
        ("CIT", "Mastery - Accomplishment"),
        ("CIT", "Mastery - Learning"),
        ("CIT", "Mastery - Self-Efficacy"),
        ("CIT", "Mastery - Self-Worth"),
        ("CIT", "Mastery - Skills"),
        ("PWB", "Environmental Mastery"),
        ("WBP", "Competence"),
        ("WBP", "Accomplishment"),
    ],
    "Autonomy and Control": [
        ("PWB", "Autonomy"),
        ("WBP", "Autonomy"),
        ("CIT", "Autonomy - Control"),
    ],
    "Engagement and Flow": [
        ("PERMA", "Engagement"),
        ("WBP", "Engagement"),
        ("CIT", "Engagement"),
    ],
    "Positive Emotions": [
        ("PERMA", "Positive Emotion"),
        ("WBP", "Positive Emotions"),
        ("PANAS", "Positive"),
    ],
    "Negative Emotions": [
        ("PERMA", "Negative emotion"),
        ("PANAS", "Negative"),
        ("CIT", "Subjective Well-Being - Negative Feelings"),
    ],
    "Meaning and Purpose": [
        ("PERMA", "Meaning"),
        ("WBP", "Meaning"),
        ("PWB", "Purpose in Life"),
        ("CIT", "Meaning"),
    ],
    "Positive Relationships": [
        ("PERMA", "Relationships"),
        ("PWB", "Positive Relations"),
        ("WBP", "Positive Relationships"),
        ("CIT", "Relationship - Belonging"),
        ("CIT", "Relationship - Community"),
        ("CIT", "Relationship - Respect"),
        ("CIT", "Relationship - Support"),
        ("CIT", "Relationship - Trust"),
    ],
    "Life Satisfaction and Well-Being": [
        ("SWLS", "LS"),
        ("WHO-5", "Well-Being"),
        ("CIT", "Subjective Well-Being - Life Satisfaction"),
    ]
}

In [7]:
# Step 2: Clean and sanitize groupings
# Define a function to clean and sanitize Scale and Dimension
def clean_text(text):
    return text.strip().replace(" ", "_")

# Clean the groupings dictionary
sanitized_groupings = {}
for group_name, dimensions in groupings.items():
    sanitized_dimensions = [(clean_text(scale), clean_text(dimension)) for scale, dimension in dimensions]
    sanitized_groupings[group_name] = sanitized_dimensions

# Step 3: Compute correlation matrices for each grouping
for group_name, dimensions in sanitized_groupings.items():
    group_items = []

    for scale, dimension in dimensions:
        # Match all items that belong to this scale and dimension
        # Allow for columns that start with the Scale_Dimension prefix, ignoring Items
        items = [
            col for col in results.columns
            if col.startswith(f"{scale}_{dimension}_")  # Match Scale and Dimension only
        ]
        group_items.extend(items)

    if len(group_items) > 1:  # Only calculate if there are multiple items in the group
        group_data = results[group_items]

        # Compute the correlation matrix for this group
        corr_matrix = group_data.corr()

        # Save the correlation matrix for this group
        output_path = f'analysis_data/Correlations/Inter-Scales Dimension/Group/Improved/{group_name}_correlation_matrix.xlsx'
        corr_matrix.to_excel(output_path)

        # Optionally, print the correlation matrix for review
        # print(f"{group_name} Correlation Matrix:")
        # print(corr_matrix)


OSError: Cannot save file into a non-existent directory: 'analysis_data/Correlations/Inter-Scales Dimension/Group/Improved'