In [105]:
import pandas as pd
import os

In [106]:
file_2023 = "2023.xlsx"
file_2024 = "2024.xlsx"

data_2023 = pd.read_excel(file_2023)
data_2024 = pd.read_excel(file_2024)

In [107]:
def process_answers(row):
    categories_to_replace = ["Sangat Tidak Sesuai", "Tidak Sesuai", "Netral", "Sesuai", "Sangat Sesuai"]

    if row["answer_text"] in categories_to_replace:
        if row["answer_text"] == "Sangat Tidak Sesuai":
            return "1"
        elif row["answer_text"] == "Tidak Sesuai":
            return "2"
        elif row["answer_text"] == "Netral":
            return "3"
        elif row["answer_text"] == "Sesuai":
            return "4"
        elif row["answer_text"] == "Sangat Sesuai":
            return "5"
    else:
        return row["answer_text"]  # Use text for non-categorized answers

In [108]:
data_combined = pd.concat([data_2023, data_2024], ignore_index=True)

In [109]:
data_combined['column_name'] = data_combined['section_name'] + " - " + data_combined['question_name']

In [110]:
data_combined["processed_answer"] = data_combined.apply(process_answers, axis=1)

In [111]:
data_combined["processed_answer"] = data_combined["processed_answer"].astype(str)

In [112]:
pivot_table = data_combined.pivot_table(
    index=["nama_survey", "name", "nis", "period", "prodi"],
    columns="column_name",
    values="processed_answer",
    aggfunc=lambda x: ' | '.join(filter(pd.notnull, x))  # Concatenate non-null values
)

In [113]:
pivot_table.reset_index(inplace=True)

In [115]:
target_column = "Pengalaman - 1. Saya pernah dan/atau sedang terlibat dalam organisasi"

if target_column in pivot_table.columns:
    # Extract unique values from the target column, split them by '|', and flatten the result
    unique_values = set(
        value.strip()
        for cell in pivot_table[target_column].dropna()
        for value in str(cell).split('|')
    )

    # Step 2: Add new columns for each unique value
    for unique_value in unique_values:
        pivot_table[f"{unique_value}"] = pivot_table[target_column].apply(
            lambda x: int(unique_value in str(x).split('|')) if pd.notnull(x) else 0
        )
        
    # Step 3: Move new columns after 'prodi'
    new_columns = [f"{unique_value}" for unique_value in unique_values]
    cols = list(pivot_table.columns)

    # Find the position of 'prodi'
    prodi_index = cols.index("prodi")

    # Reorder columns to place new columns immediately after 'prodi'
    reordered_columns = cols[:prodi_index + 1] + new_columns + cols[prodi_index + 1:-len(new_columns)]
    pivot_table = pivot_table[reordered_columns]

In [116]:
pivot_table.to_excel("processed_survey_data.xlsx", index=False)