In [39]:
import polars as pl

# Load the health survey and reverse coding items CSVs with correct file paths
health_survey = pl.read_csv("./health_survey.csv")
reverse_coding_items = pl.read_csv("./ReverseCodingItems.csv")

# Select the appropriate columns based on their actual names
# Adjusting based on the names found in the reverse coding items file
reverse_coding_items = reverse_coding_items.select(["Question", "Needs Reverse Coding?"])

# Strip whitespaces and clean the columns in reverse coding
reverse_coding_items = reverse_coding_items.with_columns([
    pl.col("Question").str.strip().alias("Question Identifier"),
    pl.col("Needs Reverse Coding?").str.strip().alias("Needs Reverse Coding")
])

# Melt the health survey dataframe to long format
health_survey_long = health_survey.melt(id_vars=[''], 
                                        value_vars=health_survey.columns[1:], # Exclude the first empty column
                                        variable_name='Question Identifier', 
                                        value_name='Question Response')

# Merge with the reverse coding items based on the Question Identifier
merged_data = health_survey_long.join(reverse_coding_items, on="Question Identifier", how="left")

# Define the recoding values
recoding_dict = {
    "Strongly Disagree": 1,
    "Somewhat Disagree": 2,
    "Neither Agree nor Disagree": 3,
    "Somewhat Agree": 4,
    "Strongly Agree": 5
}

reverse_recoding_dict = {
    "Strongly Disagree": 5,
    "Somewhat Disagree": 4,
    "Neither Agree nor Disagree": 3,
    "Somewhat Agree": 2,
    "Strongly Agree": 1
}

# Apply the recoding
def recode_values(response, reverse=False):
    if reverse:
        return reverse_recoding_dict.get(response, None)
    else:
        return recoding_dict.get(response, None)

# Create new columns for regular and reverse coded values
merged_data = merged_data.with_columns([
    pl.col("Question Response").apply(lambda x: recode_values(x, reverse=False)).alias("Temp Coded Value"),
    pl.col("Question Response").apply(lambda x: recode_values(x, reverse=True)).alias("Temp Reverse Coded Value")
])

# Use a case-when statement to select the correct recoded value based on reverse coding requirement
merged_data = merged_data.with_columns(
    pl.when(pl.col("Needs Reverse Coding") == "Yes")
      .then(pl.col("Temp Reverse Coded Value"))
      .otherwise(pl.col("Temp Coded Value"))
      .alias("Recoded Value")
)

AttributeError: 'ExprStringNameSpace' object has no attribute 'strip'