In [1]:
import warnings

warnings.filterwarnings('ignore')

In [2]:
from pluto_survey_tools import QUESTIONNAIRE
from pluto_survey_tools.model import Question
import pluto_survey_tools.stats as stats
from IPython.display import display_markdown

## Min-max ranges

### Global

Overview of the possible minimum and maximum scores for each question and section of the questionnaire.

In [3]:
display_markdown(f'# Questionnaire: {stats.score_range_questionnaire(QUESTIONNAIRE)}', raw=True)

for section in QUESTIONNAIRE.sections:
    display_markdown(f'## {section.title}: {stats.score_range_section(section)}', raw=True)
    for i, question in enumerate(section.questions):
        display_markdown(f'Q{i+1}: {stats.score_range_question(question)}', raw=True)

# Questionnaire: (-60.0, 40.0)

## Information About the Applicant: (-8.0, 8.0)

Q1: (-1.0, 1.0)

Q2: (-2.0, 1.0)

Q3: (-1.0, 2.0)

Q4: (-2.0, 2.0)

Q5: (-2.0, 2.0)

## Benefits of the Applicant’s Activity: (-5.0, 13.0)

Q1: (0.0, 4.0)

Q2: (-1.0, 2.0)

Q3: (-2.0, 2.0)

Q4: (-2.0, 2.0)

Q5: (0.0, 3.0)

## Risks of the Applicant’s Activity: (-34.0, 6.0)

Q1: (-12.0, 0.0)

Q2: (-4.0, 4.0)

Q3: (-3.0, 0.0)

Q4: (-5.0, 0.0)

Q5: (-2.0, 2.0)

Q6: (-4.0, 0.0)

Q7: (-4.0, 0.0)

## Institutional Safeguards: (-13.0, 13.0)

Q1: (-1.0, 1.0)

Q2: (-1.0, 2.0)

Q3: (-3.0, 2.0)

Q4: (-1.0, 2.0)

Q5: (-1.0, 1.0)

Q6: (-3.0, 0.0)

Q7: (-3.0, 5.0)

### By axis

In [4]:
questions = QUESTIONNAIRE.questions
x_q = [q for q in questions if q.impact_keys[0] == 'x']
x_ranges = [stats.score_range_question(q) for q in x_q]
x_min = sum([r[0] for r in x_ranges])
x_max = sum([r[1] for r in x_ranges])

y_q = [q for q in questions if q.impact_keys[0] == 'y']
y_ranges = [stats.score_range_question(q) for q in y_q]
y_min = sum([r[0] for r in y_ranges])
y_max = sum([r[1] for r in y_ranges])

In [5]:
display_markdown(f'## X ~ Risk: ({x_min}, {x_max})', raw=True)
display_markdown(f'## Y ~ Public Value: ({y_min}, {y_max})', raw=True)

## X ~ Risk: (-51.0, 23.0)

## Y ~ Public Value: (-9.0, 17.0)

## Score distributions

In [6]:
import pandas as pd
from collections import Counter


def sums(n: int, nums: list[float | int]) -> list[float]:
    return [t[1] for t in stats.all_possible_sums_gen(n, nums)]


def merge_counters(c1: Counter, c2: Counter) -> Counter:
    c = Counter()
    for k1, v1 in c1.items():
        for k2, v2 in c2.items():
            c[k1 + k2] += v1 * v2
    return c


def score_count_freq(questions: list[Question]) -> Counter:
    counters = []
    for q in questions:
        scores = [c.score for c in q.choices]
        up_to_n = q.selection_range.end
        c = Counter(sums(up_to_n, scores))
        counters.append(c)

    c = counters[0]
    for c2 in counters[1:]:
        c = merge_counters(c, c2)
    return c


def score_count_df(counter: Counter) -> pd.DataFrame:
    df = pd.DataFrame({'score': list(counter.keys()), 'count': list(counter.values())})
    return df


def score_count_df_from_questions(questions: list[Question]) -> pd.DataFrame:
    c = score_count_freq(questions)
    df = score_count_df(c)
    return df


def normalize_df(df: pd.DataFrame, columns: list[str]) -> pd.DataFrame:
    df_copy = df.copy()
    df_copy[columns] -= df_copy[columns].min()
    df_copy[columns] /= df_copy[columns].max()
    return df_copy

In [7]:
import altair as alt
import vl_convert as vlc
from IPython.display import display as _display
from pathlib import Path

chart_output_path = Path('../out')
chart_output_path.mkdir(exist_ok=True)

def display(chart: alt.Chart, name: str):
    out_path = ((chart_output_path / name).with_suffix('.svg').resolve())
    vl_spec = chart.to_json()
    svg_str = vlc.vegalite_to_svg(vl_spec=vl_spec)
    out_path.write_text(svg_str)
    png_bytes = vlc.vegalite_to_png(vl_spec=vl_spec, scale=10)
    out_path.with_suffix('.png').write_bytes(png_bytes)
    _display(chart)

In [8]:
def hist(df: pd.DataFrame, title: str, titleX='Score', titleY='Count') -> alt.Chart:
    return alt.Chart(df).mark_bar().encode(
        x=alt.X('score', title=titleX),
        y=alt.Y('count', title=titleY),
        tooltip=['score', 'count']
    ).properties(
        title=title
    )

In [9]:
config = [
    ('All questions', questions),
    ('Risk', x_q),
    ('Public Value', y_q),
]
for title, questions in config:
    df = score_count_df_from_questions(questions)
    df_norm = normalize_df(df, ['count'])
    display(chart=hist(df, title), name=f'{title} - single')
    display(chart=hist(df_norm, f'{title} (normalized)', titleY='Normalized count'), name=f'{title} - single - normalized')

### Diff Vis

In [10]:
from copy import deepcopy

def edited_questions(original=QUESTIONNAIRE.questions) -> list[Question]:
    edited = []
    for i, q in enumerate(deepcopy(original)):
        should_add = i % 2 == 0
        for c in q.choices:
            c.score += 2 if should_add else -3
        edited.append(q)
    return edited


def hist_diff(df1: pd.DataFrame, df2: pd.DataFrame, title: str, titleX='Score', titleY='Count') -> alt.Chart:
    h1 = alt.Chart(df1).mark_bar().encode(
        x=alt.X('score', title=titleX),
        y=alt.Y('count', title=titleY),
    )
    # Other color
    h2 = alt.Chart(df2).mark_bar().encode(
        x=alt.X('score', title=titleX),
        y=alt.Y('count', title=titleY),
        color=alt.value('red'),
        opacity=alt.value(0.5)
    )
    return (h1 + h2).properties(
        title=title
    )

In [11]:
config = [
    ('All questions', questions),
    ('Risk', x_q),
    ('Public Value', y_q),
]
for title, questions in config:
    df_original = score_count_df_from_questions(questions)
    df_original_norm = normalize_df(df_original, ['count'])
    df_edited = score_count_df_from_questions(edited_questions(questions))
    df_edited_norm = normalize_df(df_edited, ['count'])
    display(chart=hist_diff(df_original, df_edited, title), name=f'{title} - diff')
    display(chart=hist_diff(df_original_norm, df_edited_norm, f'{title} (normalized)', titleY='Normalized count'), name=f'{title} - diff - normalized')