In [1]:
import pandas as pd
import numpy as np
from scipy.stats import pearsonr
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = "iframe"

# ───── 1. Data inlezen ──────────────────────────────────────
MENTAL_CSV = "Data/mental_welfare_dataset.csv"
CPDS_CSV   = "Data/cpds-1960-2022-update-2024-2.csv"

mental = pd.read_csv(MENTAL_CSV).rename(columns={"Unnamed: 0": "drop"}, errors="ignore")
cpds = pd.read_csv(CPDS_CSV)
cpds.columns = [c.lower() for c in cpds.columns]

# ───── 2. Harmonisatie ──────────────────────────────────────
rename = {"United States of America": "United States",
          "Czech Republic": "Czechia", "Russian Federation": "Russia"}
for d in (mental, cpds):
    d["country"] = d["country"].replace(rename)

mental.rename(columns={"life_ladder": "ladder"}, inplace=True)

# ───── 3. Variabelen ────────────────────────────────────────
social_vars_all = [s.lower() for s in [
    "sstran", "socexp_t_pmp", "socexp_c_pmp", "socexp_k_pmp",
    "oldage_pmp", "survivor_pmp", "incapben_pmp", "health_pmp",
    "family_pmp", "fallow_pmp", "mpleave_pmp", "childcare_pmp",
    "homehelp_pmp", "unemp_pmp", "almp_pmp", "training_pmp",
    "housing_pmp", "othsocx_pmp"
]]
social_vars = [v for v in social_vars_all if v in cpds.columns]

# ───── 4. Merge ─────────────────────────────────────────────
merged = mental.merge(cpds[["country", "year"] + social_vars],
                      on=["country", "year"], how="inner").dropna()

# ───── 5. Correlatie berekenen en filteren ──────────────────
r_values = []
filtered_vars = []
for s in social_vars:
    r = pearsonr(merged["ladder"], merged[s])[0]
    if r >= 0.4:  # Alleen positieve verbanden >= 0.4
        r_values.append(r)
        filtered_vars.append(s)

# ───── 6. Labels en uitleg ─────────────────────────────────
soc_lbl = {
    "sstran": "Soc. transfers %GDP",
    "socexp_t_pmp": "Total soc. exp.",
    "socexp_c_pmp": "Cash soc. exp.",
    "socexp_k_pmp": "In-kind soc. exp.",
    "oldage_pmp": "Old-age ben.",
    "survivor_pmp": "Survivor ben.",
    "incapben_pmp": "Incapacity ben.",
    "health_pmp": "Health exp.",
    "family_pmp": "Family ben.",
    "fallow_pmp": "Family allowance",
    "mpleave_pmp": "Mat./Pat. leave",
    "childcare_pmp": "Child-care ben.",
    "homehelp_pmp": "Home-help ben.",
    "unemp_pmp": "Unemployment ben.",
    "almp_pmp": "ALMP programmes",
    "training_pmp": "Training prog.",
    "housing_pmp": "Housing ben.",
    "othsocx_pmp": "Other soc. exp."
}

soc_expl = {
    "sstran": "Overheidsuitgaven aan sociale transfers als % van het BBP.",
    "socexp_t_pmp": "Totale sociale overheidsuitgaven per hoofd van de bevolking.",
    "socexp_c_pmp": "Sociale uitgaven in geld, per persoon.",
    "socexp_k_pmp": "Sociale uitgaven in natura (zoals gezondheidszorg), per persoon.",
    "oldage_pmp": "Overheidsuitgaven aan pensioenen, per persoon.",
    "survivor_pmp": "Uitgaven aan nabestaanden, per persoon.",
    "incapben_pmp": "Uitgaven aan arbeidsongeschiktheid, per persoon.",
    "health_pmp": "Overheidsuitgaven aan gezondheidszorg, per persoon.",
    "family_pmp": "Overheidsuitgaven aan gezinsvoordelen, per persoon.",
    "fallow_pmp": "Kinderbijslag en soortgelijke voordelen, per persoon.",
    "mpleave_pmp": "Uitgaven aan ouderschapsverlof, per persoon.",
    "childcare_pmp": "Uitgaven aan kinderopvang, per persoon.",
    "homehelp_pmp": "Uitgaven aan thuishulp voor ouderen/zieken.",
    "unemp_pmp": "Uitgaven aan werkloosheidsuitkeringen, per persoon.",
    "almp_pmp": "Actief arbeidsmarktbeleid, zoals training/subsidies.",
    "training_pmp": "Training- en omscholingsprogramma's, per persoon.",
    "housing_pmp": "Uitgaven aan huisvestingssteun, per persoon.",
    "othsocx_pmp": "Overige sociale uitgaven, per persoon."
}

col_labels = [soc_lbl.get(s, s) for s in filtered_vars]

# ───── 7. Sorting toepassen ─────────────────────────────────
combined = sorted(zip(col_labels, r_values, filtered_vars), key=lambda x: x[1])
col_labels_sorted = [x[0] for x in combined]
r_values_sorted = [x[1] for x in combined]
filtered_vars_sorted = [x[2] for x in combined]

# ───── 8. Hovertekst opnieuw genereren ──────────────────────
hover_text_sorted = []
for i in range(len(r_values_sorted)):
    uitleg = soc_expl.get(filtered_vars_sorted[i], "")
    tekst = (
        f"<b>{col_labels_sorted[i]}</b><br>"
        f"{uitleg}<br><br>"
        f"Verband: {r_values_sorted[i]:.2f}<br>"
        f"Hoger → meer geluk"
    )
    hover_text_sorted.append(tekst)

# ───── 9. Heatmap tekenen ───────────────────────────────────
fig = go.Figure()

fig.add_trace(go.Heatmap(
    z=[r_values_sorted],
    x=col_labels_sorted,
    y=["Geluksscore"],
    colorscale='reds',
    zmin=0,
    zmax=1,
    text=[hover_text_sorted],
    hoverinfo="text",
    showscale=False  # Geen kleurbar
))

fig.update_layout(
    title="Sterkte van positieve verbanden tussen overheidsuitgaven en geluk",
    xaxis_title="Soort overheidsuitgave (oplopend verband)",
    xaxis_tickangle=-45,
    width=1100,
    height=300,
    font=dict(size=12),
    plot_bgcolor='white',
    margin=dict(l=100, r=100, t=100, b=120)
)

fig.show()


IndexError: list index out of range

In [2]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

# Dataset inladen
df = pd.read_csv("Data/volledige_gecombineerde_dataset.csv")

# Filter op 2012 en complete data
df_happy = df[(df['year'] == 2012)].dropna(subset=[
    'socexp_c_pmp', 'socexp_k_pmp', 'ladder_y'
])

# Kleuren per land
kleuren = px.colors.qualitative.Plotly + px.colors.qualitative.D3 + px.colors.qualitative.Set3
landen = df_happy['country'].unique()
kleuren_dict = {land: kleuren[i % len(kleuren)] for i, land in enumerate(landen)}
kleur_per_land = df_happy['country'].map(kleuren_dict)

# Regressielijn functie
def bereken_regressie(x, y):
    slope, intercept = np.polyfit(x, y, 1)
    x_vals = np.linspace(x.min(), x.max(), 100)
    y_vals = slope * x_vals + intercept
    return x_vals, y_vals, round(x.corr(y), 2)

# Bepaal assen-schaal
x_min = min(df_happy['socexp_c_pmp'].min(), df_happy['socexp_k_pmp'].min())
x_max = max(df_happy['socexp_c_pmp'].max(), df_happy['socexp_k_pmp'].max())

# Regressie + correlatie berekenen
x_vals_c, y_vals_c, corr_c = bereken_regressie(df_happy['socexp_c_pmp'], df_happy['ladder_y'])
x_vals_k, y_vals_k, corr_k = bereken_regressie(df_happy['socexp_k_pmp'], df_happy['ladder_y'])

# Subplots aanmaken
fig = make_subplots(rows=1, cols=2, subplot_titles=(
    f"Cash-uitgaven versus geluk (r = {corr_c})",
    f"In-kind-uitgaven versus geluk (r = {corr_k})"
))

# Cash-uitgaven (links)
fig.add_trace(go.Scatter(
    x=df_happy['socexp_c_pmp'],
    y=df_happy['ladder_y'],
    mode='markers',
    marker=dict(size=10, color=kleur_per_land, line=dict(width=1, color='black')),
    text=df_happy['country'],
    showlegend=False,
    hovertemplate="<b>Land:</b> %{text}<br>Cash-uitgaven: %{x:.2f}% van BBP<br>Gemiddelde geluksscore: %{y:.2f} (0-10)<extra></extra>"
), row=1, col=1)

fig.add_trace(go.Scatter(
    x=x_vals_c,
    y=y_vals_c,
    mode='lines',
    line=dict(color='blue', dash='dash'),
    name='Trend cash-uitgaven'
), row=1, col=1)

# In-kind-uitgaven (rechts)
fig.add_trace(go.Scatter(
    x=df_happy['socexp_k_pmp'],
    y=df_happy['ladder_y'],
    mode='markers',
    marker=dict(size=10, color=kleur_per_land, line=dict(width=1, color='black')),
    text=df_happy['country'],
    showlegend=False,
    hovertemplate="<b>Land:</b> %{text}<br>In-kind-uitgaven: %{x:.2f}% van BBP<br>Gemiddelde geluksscore: %{y:.2f} (0-10)<extra></extra>"
), row=1, col=2)

fig.add_trace(go.Scatter(
    x=x_vals_k,
    y=y_vals_k,
    mode='lines',
    line=dict(color='orange', dash='dash'),
    name='Trend in-kind-uitgaven'
), row=1, col=2)

# Layout netjes maken
fig.update_layout(
    height=500,
    width=1000,
    title_text="Relatie tussen sociale uitgaven en geluk per land (2012)",
    template="simple_white",
    legend=dict(x=1.05, y=1),
)

# X-as schalen gelijk zetten
fig.update_xaxes(range=[x_min - 1, x_max + 1], title_text="Uitgaven als percentage van BBP", row=1, col=1)
fig.update_xaxes(range=[x_min - 1, x_max + 1], title_text="Uitgaven als percentage van BBP", row=1, col=2)
fig.update_yaxes(title_text="Gemiddelde geluksscore (0-10)", row=1, col=1)
fig.update_yaxes(title_text="Gemiddelde geluksscore (0-10)", row=1, col=2)

fig.show()


In [3]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

# Dataset inladen
pad = "Data/volledige_gecombineerde_dataset.csv"  # Pas pad aan indien nodig
df = pd.read_csv(pad)

# Filteren op compleet jaar
df_2012 = df[df['year'] == 2012].dropna(subset=[
    'socexp_c_pmp',
    'socexp_k_pmp',
    'ladder_y'  # Dit is de juiste kolomnaam
])

# Functie voor regressielijn
def regressielijn(x, y):
    slope, intercept = np.polyfit(x, y, 1)
    x_vals = np.linspace(x.min(), x.max(), 100)
    y_vals = slope * x_vals + intercept
    return x_vals, y_vals

# Data voorbereiden
x_cash = df_2012['socexp_c_pmp']
x_natura = df_2012['socexp_k_pmp']
y_geluk = df_2012['ladder_y']


# Regressielijnen
x_vals_cash, y_vals_cash = regressielijn(x_cash, y_geluk)
x_vals_natura, y_vals_natura = regressielijn(x_natura, y_geluk)

# Figuur
fig = go.Figure()

# Cash-uitgaven
fig.add_trace(go.Scatter(
    x=x_cash,
    y=y_geluk,
    mode='markers',
    marker=dict(color='blue'),
    text=df_2012['country'],
    name='Cash-uitgaven (% BBP)',
    hovertemplate="<b>%{text}</b><br>Cash-uitgaven: %{x:.2f}% van BBP<br>Geluksscore: %{y:.2f}<extra></extra>"
))

# In-kind-uitgaven
fig.add_trace(go.Scatter(
    x=x_natura,
    y=y_geluk,
    mode='markers',
    marker=dict(color='orange'),
    text=df_2012['country'],
    name='In-kind-uitgaven (% BBP)',
    hovertemplate="<b>%{text}</b><br>In-kind-uitgaven: %{x:.2f}% van BBP<br>Geluksscore: %{y:.2f}<extra></extra>"
))

# Trendlijn cash
fig.add_trace(go.Scatter(
    x=x_vals_cash,
    y=y_vals_cash,
    mode='lines',
    line=dict(color='blue', dash='dash'),
    name='Trend cash-uitgaven'
))

# Trendlijn in-kind
fig.add_trace(go.Scatter(
    x=x_vals_natura,
    y=y_vals_natura,
    mode='lines',
    line=dict(color='orange', dash='dash'),
    name='Trend in-kind-uitgaven'
))

# Layout
fig.update_layout(
    title="Relatie tussen sociale uitgaven en geluk (2012)",
    xaxis_title="Sociale uitgaven als percentage van BBP",
    yaxis_title="Gemiddelde geluksscore (0–10)",
    template="simple_white"
)

fig.show()


In [4]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

# Dataset inladen
pad = "Data/volledige_gecombineerde_dataset_met_family.csv"
df = pd.read_csv(pad)

# Filter op 2012
df_2012 = df[df['year'] == 2012].dropna(subset=[
    'socexp_k_pmp',
    'suicides_per_100k',
    'ladder_y',
    'total_mental_illness_prev'
])

# Functie voor regressielijn en correlatie
def bereken_regressie(x, y):
    slope, intercept = np.polyfit(x, y, 1)
    x_vals = np.linspace(x.min(), x.max(), 100)
    y_vals = slope * x_vals + intercept
    corr = round(x.corr(y), 2)
    return x_vals, y_vals, corr

# Keuzevariabelen en labels
variabelen = {
    'suicides_per_100k': 'Suïcidecijfers (per 100.000 inwoners)',
    'ladder_y': 'Geluksscore (Cantril ladder, 0–10)',
    'total_mental_illness_prev': 'Totaal mentale aandoeningen (%)'
}

# Standaard indicator
y_default = 'suicides_per_100k'

# Eerste keer plotten
x_data = df_2012['socexp_k_pmp']
y_data = df_2012[y_default]
x_vals, y_vals, corr = bereken_regressie(x_data, y_data)

fig = go.Figure()

# Scatterplot
fig.add_trace(go.Scatter(
    x=x_data,
    y=y_data,
    mode='markers',
    marker=dict(size=10, color='steelblue', line=dict(width=1, color='black')),
    text=df_2012['country'],
    hovertemplate="<b>Land:</b> %{text}<br>Sociale uitgaven in natura: %{x:.2f}% van BBP<br>Waarde: %{y:.2f}<extra></extra>",
    name='Landen'
))

# Trendlijn
fig.add_trace(go.Scatter(
    x=x_vals,
    y=y_vals,
    mode='lines',
    line=dict(color='red', dash='dash'),
    name='Trendlijn'
))

# Dropdownmenu met duidelijke labels + correlatie in titel
dropdown_buttons = []
for var, label in variabelen.items():
    y_data = df_2012[var]
    x_vals, y_vals, corr = bereken_regressie(x_data, y_data)

    dropdown_buttons.append(dict(
        label=f"{label} (r = {corr})",
        method="update",
        args=[
            {"x": [x_data, x_vals],
             "y": [y_data, y_vals]},
            {"yaxis": {"title": label},
             "title": f"{label} versus sociale uitgaven in natura (2012) — r = {corr}"}
        ]
    ))

# Layout
fig.update_layout(
    title=f"{variabelen[y_default]} versus sociale uitgaven in natura (2012) — r = {corr}",
    xaxis_title="Sociale uitgaven in natura als percentage van BBP",
    yaxis_title=variabelen[y_default],
    updatemenus=[
        dict(
            buttons=dropdown_buttons,
            direction="up",
            x=1.2,
            xanchor="right",
            y=-0.2,
            yanchor="top",
            showactive=True
        )
    ],
    template="simple_white"
)

fig.show()
