In [None]:
class SingleChoiceQuestion(Question):
    """
    Class for a single-choice survey question.
    Handles:
    - Basic single-choice answers (stored as numbers).
    - Additional text responses if applicable (found dynamically).
    DE2, DE4, DE5, DE6, DE8, DE12, DE14, DE15, DE16, DE17, DE18, DE19, DE20, DE21, PL5, PL8,
    """
    def __init__(
        self,
        question_id: str,
        df: pd.DataFrame,
        df_raw: pd.DataFrame,
        value_transform: Callable[[Any], Any] = None,
        unit_hint: Optional[str] = None,
    ):
        super().__init__(question_id, df, df_raw, value_transform)
        self.unit_hint = unit_hint

        # Clean up responses
        self.responses = pd.to_numeric(self.df[question_id], errors="coerce").dropna()

        # Optional: collect additional free text responses (like for "Other")
        self.extra_texts = {}
        for col in df.columns:
            if col.startswith(f"{question_id}_") and col.endswith("_TEXT"):
                option_number = col.split("_")[1]
                text_series = df[col].dropna().astype(str)
                if not text_series.empty:
                    self.extra_texts[option_number] = text_series

    def __repr__(self):
        if self.responses.empty:
            return f"{self.question_text} – No responses provided."
        return f"{self.question_text} – {len(self.responses)} responses collected."

    def plot_distribution(self, display=True):
        value_counts = self.responses.value_counts().sort_index()
        total = len(self.responses)
        percentages = (value_counts / total * 100).round(2)

        # Decode value_map (gender 1 = woman, etc.)
        labels = [
            self.value_map.get(str(int(x)), f"Option {int(x)}")
            for x in value_counts.index
        ]

        print(f"{total} respondents out of {self.df.shape[0]} participants provided a response.")

        fig = self._plot_bar_distribution(
            labels=labels,
            percentages=percentages,
            title=self.question_text,
        )

        if display:
            fig.show()
        return fig


        """
        def __init__(self, question_id: str, df: pd.DataFrame, value_map: dict = None, value_transform: Callable[[Any], Any] = None, unit_hint: Optional[str] = None):
        super().__init__(question_id, df, df_raw, value_transform)
        #self.response = None  # Numeric response
        #self.extra_texts = {}  # Dictionary storing any additional text 
        #self.value_map = value_map or {}
        #self.value_transform = value_transform
        self.unit_hint = unit_hint
        
        participant_data = df.loc[df["responseId"] == response_id]

        if not participant_data.empty and question_id in participant_data.columns:
            self.response = str(participant_data[question_id].values[0])

        for col in participant_data.columns:
            if col.startswith(f"{question_id}_") and col.endswith("_TEXT"):
                option_number = col.split("_")[1]

                if option_number == self.response:  
                    text_value = participant_data[col].values[0]
                    if isinstance(text_value, list) or isinstance(text_value, np.ndarray):
                        if len(text_value) > 0:
                            text_value = text_value[0]

                    if pd.notna(text_value): 
                        self.extra_texts[option_number] = text_value.strip()

        if (
            self.value_transform 
            and self.unit_hint 
            and self.response in self.extra_texts
        ):
            try:
                numeric_val = float(self.extra_texts[self.response])
                transformed_val = self.value_transform(numeric_val, self.unit_hint)
                self.extra_texts[self.response] = str(transformed_val)
            except (ValueError, TypeError):
                pass  # gracefully fall back if parsing or transformation fails


        def __repr__(self):
            if self.responses.empty:
                return f"{self.question_text} – No responses provided."
            return f"{self.question_text} – {len(self.responses)} responses collected."
   


    def plot_distribution(self):
        if self.df[self.question_id].isnull().all():
            print("No valid response data available.")
            return

        responses = pd.to_numeric(self.df[self.question_id], errors="coerce").dropna()
        if responses.empty:
            print("No responses to plot.")
            return

        total_respondents = len(responses)
        total_participants = self.df.shape[0]
        print(f"{total_respondents} respondents out of {total_participants} participants provided a response.")

        if self.value_map:
            mapped = responses.map(lambda x: self.value_map.get(str(int(x)), f"Option {int(x)}"))
        else:
            mapped = responses.astype(str)

        value_counts = mapped.value_counts()
        percentages = (value_counts / total_respondents * 100).round(2)

        # Optional: preserve original value_map order
        if self.value_map:
            label_order = [self.value_map[k] for k in sorted(self.value_map.keys(), key=int)]
            percentages = percentages.reindex(label_order).dropna()

        fig = self._plot_bar_distribution(
            labels=percentages.index,
            percentages=percentages.values,
            title=self.question_text
        )

        fig.show()
        return fig"""

    def distribution(self, display=True):
        if not self.subcolumns and isinstance(self.responses, pd.Series):
            value_map = self.metadata.get("value_map", {})
            mapped_responses = self.responses.map(value_map)
            counts = mapped_responses.value_counts()  # no .sort_index() # new
            ordered_labels = []  # new
            ordered_values = []  # new
            for key in value_map:  # new – preserves the order in question_maps
                label = value_map[key]
                c = counts.get(label, 0)
                ordered_labels.append(label)
                ordered_values.append(c)
            if len(ordered_labels) < 4:
                fig = self._plot_pie_distribution(ordered_labels, ordered_values, self.question_text)
            else:
                fig = self._plot_bar_distribution(ordered_labels, ordered_values, self.question_text)
                wrapped_labels = [self.wrap_label(lbl, width=20) for lbl in ordered_labels]
                fig.update_xaxes(tickvals=ordered_labels, ticktext=wrapped_labels, automargin=True)

        elif self.subcolumns:
            combined = []
            for sub in self.subcolumns:
                combined.extend(self.responses[sub].dropna().tolist())
            combined_series = pd.Series(combined)
            responses_str = combined_series.astype(str)
            value_map = self.metadata.get("value_map", {})
            mapped_responses = responses_str.map(value_map)
            counts = mapped_responses.value_counts().sort_index()
            labels = counts.index.tolist()
            values = counts.values.tolist()
            if len(labels) < 4:
                fig = self._plot_pie_distribution(ordered_labels, ordered_values, self.question_text)
            else:
                ordered_labels = labels
                fig = self._plot_bar_distribution(ordered_labels, values, self.question_text)
                wrapped_labels = [self.wrap_label(lbl, width=20) for lbl in ordered_labels]
                fig.update_xaxes(tickvals=ordered_labels, ticktext=wrapped_labels, automargin=True)

        if display and fig is not None:
            fig.show()

        return fig

    def extract_string_responses(self):
        if self.subcolumns:
            combined = []
            for sub in self.subcolumns:
                values = self.df[sub].dropna().astype(str).tolist()
                combined.extend(values)
            self.responses = pd.Series(combined)
        elif self.question_id in self.df.columns:
            self.responses = self.df[self.question_id].dropna().astype(str)
        else:
            self.responses = pd.Series([], dtype=str)

In [None]:
#matrix question class

def __init__(self, question_id: str, question_text: str, df: pd.DataFrame, 
                 response_id: str, value_map: dict = None, row_map: dict = None, 
                 sub_map: dict = None, value_transform: Callable[[Any, str], Any] = None,
                 format: str = "auto"):
        super().__init__(question_id, question_text, value_transform)
        self.responses = {}  
        self.value_map = value_map or {}  
        self.row_map = row_map or {}
        self.sub_map = sub_map or {}
        self.format = format



        participant_data = df.loc[df["responseId"] == response_id]
        for col in participant_data.columns:
            if not col.startswith(f"{question_id}_"):
                continue

            parts = col.strip().split("_")
            if len(parts) < 2:
                continue  # Skip malformed

            if len(parts) == 3:
                if self.format == "row-sub":
                    row_key = parts[1]
                    sub_col = parts[2]
                elif self.format == "attr-row":
                    attr_key = parts[1]
                    row_key = parts[2]
                    sub_col = attr_key
                elif self.format == "auto":
                    # Heuristic: if both look like digits, assume row-sub
                    if parts[1].isdigit() and parts[2].isdigit():
                        row_key = parts[1]
                        sub_col = parts[2]
                    else:
                        row_key = parts[2]
                        sub_col = parts[1]
                else:
                    continue  # Skip malformed or unknown format

            elif len(parts) == 2:
                row_key = parts[1]
                sub_col = None
            else:
                continue  # Still malformed


            raw_value = participant_data[col].values[0]
            if isinstance(raw_value, list):
                raw_value = raw_value[0]


            decoded_row = self.row_map.get(row_key, f"Row {row_key}")
            if self.value_transform:
                decoded_value = self.value_transform(raw_value, sub_col)
            else:
                decoded_value = raw_value



            if sub_col is None:
                # Single-answer (flat)
                self.responses[decoded_row] = decoded_value
            else:
                # Multi-answer (nested dict)
                if decoded_row not in self.responses:
                    self.responses[decoded_row] = {}
                self.responses[decoded_row][sub_col] = decoded_value


In [None]:
    # matrix

def plot_grouped_distribution_from_counts(self, count_df, title, x_label="Categories"):
        # Format title and labels
        truncated = self.truncate_after_first_period(title)
        wrapped_title = self.wrap_text(truncated, width=40)
        count_df["Value"] = count_df["Value"].apply(lambda v: self.wrap_text(str(v), width=30))

        # Build color map based on legend labels
        legend_key = self.grouping_key
        legend_labels = sorted(count_df[legend_key].unique())
        hex_palette = ["#F9C784", "#E7E7E7"]
        color_map = {label: hex_palette[i % len(hex_palette)] for i, label in enumerate(legend_labels)}

        fig = px.bar(
            count_df,
            x="Value",
            y="percentage",
            color=legend_key,
            barmode="group",
            text=count_df["percentage"].round(1).astype(str) + " %",
            hover_data={"count": True, "percentage": True},
            labels={"Value": x_label, "percentage": "Percentage (%)"},
            color_discrete_map=color_map,
            title=wrapped_title
        )

        fig.update_layout(
            width=1000,
            height=500,
            margin=dict(r=200),
            yaxis_title="Percentage (%)",
            xaxis_title=x_label,
            #plot_bgcolor="rgba(240,240,240,1)",
            font=dict(size=14)
        )

        fig.update_traces(
            marker_line_color="black",
            marker_line_width=1
        )

        return fig