In [5]:
import polars as pl
import pandas as pd

df = pl.read_csv("./health_survey.csv")
reverse_df = pd.read_csv("./ReverseCodingItems.csv")

reversed_df = pl.from_pandas(reverse_df)

df = df.with_columns([pl.col("").cast(pl.Utf8).alias("Unnamed_Column")])

df = df.join(reversed_df, how="left", left_on="Unnamed_Column", right_on="Column Name")

df = df.with_columns([
    pl.when(pl.col("Needs Reverse Coding?") == "Yes")
    .then(pl.lit(True))
    .otherwise(pl.lit(False))
    .alias("Needs Reverse")
])

response_columns = [col for col in df.columns if col.startswith('F')]

for col in response_columns:
    df = df.with_columns([
        pl.when(pl.col(col) == "Strongly Disagree").then(1)
        .when(pl.col(col) == "Somewhat Disagree").then(2)
        .when(pl.col(col) == "Neither Agree nor Disagree").then(3)
        .when(pl.col(col) == "Somewhat Agree").then(4)
        .when(pl.col(col) == "Strongly Agree").then(5)
        .alias(f"{col}_Temp_Coded"),
        
        pl.when(pl.col(col) == "Strongly Disagree").then(5)
        .when(pl.col(col) == "Somewhat Disagree").then(4)
        .when(pl.col(col) == "Neither Agree nor Disagree").then(3)
        .when(pl.col(col) == "Somewhat Agree").then(2)
        .when(pl.col(col) == "Strongly Agree").then(1)
        .alias(f"{col}_Temp_Reversed")
    ])

for col in response_columns:
    df = df.with_columns([
        pl.when(pl.col("Needs Reverse") == True)
        .then(pl.col(f"{col}_Temp_Reversed"))
        .otherwise(pl.col(f"{col}_Temp_Coded"))
        .alias(f"{col}_Recoded")
    ])

df_stacked = df.unpivot(
    [f"{col}_Recoded" for col in response_columns],
    index=["Unnamed_Column"]
).rename({"variable": "Question_Type", "value": "Recoded Value"})

df_aggregated = df_stacked.group_by("Question_Type").agg(
    pl.col("Recoded Value").mean().alias("Mean Recoded Value")
)

In [7]:
df_aggregated.write_csv('./health_survey_summary.csv', separator=",")