# Culture Explorer: World & European Values Survey Toolkit

This notebook delivers an end-to-end cultural analytics workspace that can be executed in Google Colab. It combines an interactive Leaflet.js map (powered by **folium**), comparison dashboards, a pure Python helper layer, and OpenAI-powered cultural insights to satisfy the requirements outlined in the project brief.

**What you can do here:**

* Explore World Values Survey (WVS) and European Values Survey (EVS) indicators on a global map.
* Build comparison matrices for arbitrary sets of countries, fine-tuning the weight of individual survey questions through interactive controls.
* Capture retrospective and future survey measurements directly inside the notebook or via the helper service methods.
* Create family or team groups, collect their survey responses, and match their profiles to the closest countries.
* Plug in an OpenAI API key to obtain reusable cultural briefings, collaboration advice, and conflict mediation guidance from the survey scores.

> **Data**: A small representative CSV is bundled inside the repository for offline execution. In a Colab environment you can replace it with the official WVS/EVS extracts placed under the `data/` folder.


## 1. Environment setup

The following cell installs all runtime dependencies. Comment it out if you already manage packages elsewhere.

In [None]:
!pip install -q folium ipywidgets plotly openai pandas numpy pdfplumber


## 2. Imports and global configuration

In [None]:
import json
import hashlib
import re
from pathlib import Path
from typing import Dict, List, Optional, Tuple
from itertools import groupby

import numpy as np
import pandas as pd

import folium
from folium import Map, CircleMarker, FeatureGroup

import ipywidgets as widgets
from IPython.display import Markdown, display, clear_output

import plotly.graph_objects as go

try:
    from openai import OpenAI
except ImportError:  # pragma: no cover - handled gracefully in environments without openai
    OpenAI = None

try:
    import pdfplumber  # type: ignore
except ImportError:  # pragma: no cover - optional dependency for parsing PDF catalogues
    pdfplumber = None

DATA_DIR = Path("data")
CACHE_PATH = DATA_DIR / "openai_response_cache.json"
COUNTRY_PATH = DATA_DIR / "country.csv"
LOCATION_PATH = DATA_DIR / "long_lat.csv"
COLUMN_SNAPSHOT_PATH = DATA_DIR / "column_first_rows.json"
QUESTION_INDEX_PATH = DATA_DIR / "question_index.json"

QUESTION_CODE_PATTERN = re.compile(r'^([A-Za-z]+\d+[A-Za-z0-9-]*)$')

def _sanitise_column_name(name: str) -> str:
    return re.sub(r'[^a-z0-9]', '', str(name).lower())

def derive_standard_column_mapping(columns: List[str], entity_type: str) -> Dict[str, str]:
    patterns = {
        'Country': ['country', 'countryname', 'nation'],
        'ISO3': ['iso3', 'iso'],
        'Latitude': ['latitude', 'lat'],
        'Longitude': ['longitude', 'long', 'lng'],
        'Source': ['source', 'dataset', 'survey'],
        'Year': ['year'],
    }
    if entity_type == 'location':
        patterns['Area'] = ['area', 'region', 'state', 'province', 'location', 'admin', 'adminname']
    mapping: Dict[str, str] = {}
    sanitised = {col: _sanitise_column_name(col) for col in columns}
    for target, keywords in patterns.items():
        match = None
        # exact match first
        for keyword in keywords:
            for col, norm in sanitised.items():
                if norm == keyword:
                    match = col
                    break
            if match:
                break
        if not match:
            for keyword in keywords:
                for col, norm in sanitised.items():
                    if norm.endswith(keyword) or keyword in norm:
                        match = col
                        break
                if match:
                    break
        if match and target not in mapping.values():
            mapping[match] = target
    return mapping

def _looks_like_pdf(path: Path) -> bool:
    try:
        with path.open("rb") as handle:
            return handle.read(4) == b"%PDF"
    except OSError:
        return False

def find_question_catalogue(data_dir: Path) -> Optional[Path]:
    search_roots = [
        data_dir,
        Path("questions"),
        Path.cwd(),
    ]
    candidates: List[Path] = []
    for root in search_roots:
        if root.is_file():
            if root.suffix.lower() == ".pdf" or _looks_like_pdf(root):
                candidates.append(root.resolve())
            continue
        if not root.exists() or not root.is_dir():
            continue
        for candidate in root.glob("*.pdf"):
            if candidate.is_file():
                candidates.append(candidate.resolve())
        for candidate in root.glob("*"):
            if candidate.is_file() and _looks_like_pdf(candidate):
                candidates.append(candidate.resolve())
    unique_candidates: List[Path] = []
    seen: set[Path] = set()
    for candidate in candidates:
        if candidate not in seen:
            unique_candidates.append(candidate)
            seen.add(candidate)
    prioritised = [path for path in unique_candidates if "question" in path.name.lower()]
    return prioritised[0] if prioritised else (unique_candidates[0] if unique_candidates else None)

def extract_official_question_titles(pdf_path: Path) -> Dict[str, str]:
    if pdfplumber is None:
        return {}
    question_titles: Dict[str, str] = {}
    try:
        with pdfplumber.open(str(pdf_path)) as pdf:
            for page in pdf.pages:
                words = page.extract_words(use_text_flow=True, keep_blank_chars=False)
                if not words:
                    continue
                words.sort(key=lambda w: (round(w['top'], 1), w['x0']))
                for _, group in groupby(words, key=lambda w: round(w['top'], 1)):
                    row = sorted(group, key=lambda w: w['x0'])
                    idx = 0
                    while idx < len(row):
                        raw_code = row[idx]['text'].strip()
                        candidate = re.sub(r'[\s.:]+$', '', raw_code)
                        if QUESTION_CODE_PATTERN.match(candidate):
                            code = candidate
                            idx += 1
                            text_parts: List[str] = []
                            while idx < len(row):
                                next_text = row[idx]['text'].strip()
                                cleaned = re.sub(r'[\s.:]+$', '', next_text)
                                if QUESTION_CODE_PATTERN.match(cleaned):
                                    break
                                text_parts.append(next_text)
                                idx += 1
                            question = ' '.join(text_parts).strip()
                            question = re.sub(r'(?:\s+\d+)+$', '', question).strip()
                            question = re.sub(r'\s{2,}', ' ', question)
                            if question and code not in question_titles:
                                question_titles[code] = question
                        else:
                            idx += 1
    except Exception as exc:  # pragma: no cover - parsing quality depends on PDF formatting
        display(Markdown(f'⚠️ **Warning:** Unable to parse `{pdf_path.name}` for question titles ({exc}).'))
    return question_titles

def normalise_headers(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df.columns = [str(col).strip() for col in df.columns]
    return df

def ensure_required_columns(df: pd.DataFrame, entity_type: str) -> pd.DataFrame:
    df = df.copy()
    mapping = derive_standard_column_mapping(df.columns.tolist(), entity_type)
    if mapping:
        df = df.rename(columns=mapping)
    if 'Source' not in df.columns:
        df['Source'] = 'World Values Survey'
    if 'Year' not in df.columns:
        df['Year'] = pd.Timestamp.today().year
    if 'ISO3' not in df.columns:
        if 'Country' in df.columns:
            df['ISO3'] = df['Country'].astype(str).str[:3].str.upper()
        else:
            df['ISO3'] = 'UNK'
    if entity_type == 'location' and 'Area' not in df.columns:
        fallback = None
        for candidate in ['Location', 'Region', 'State', 'Province']:
            if candidate in df.columns:
                fallback = candidate
                break
        df['Area'] = df[fallback] if fallback else 'Unknown'
    return df

def summarise_spreadsheet(path: Path, titles: Dict[str, str]) -> Dict[str, object]:
    if not path.exists():
        return {"columns": [], "first_row": {}, "question_index": []}
    try:
        raw = pd.read_csv(path)
    except pd.errors.EmptyDataError:
        return {"columns": [], "first_row": {}, "question_index": []}
    raw = normalise_headers(raw)
    columns = [str(col).strip() for col in raw.columns]
    raw.columns = columns
    if raw.empty:
        first_row = {}
    else:
        sample = raw.iloc[0]
        def _convert(value: object) -> object:
            if pd.isna(value):
                return None
            if isinstance(value, np.generic):
                try:
                    return value.item()
                except Exception:
                    pass
            if hasattr(value, "item"):
                try:
                    return value.item()
                except Exception:
                    return value
            return value
        first_row = {col: _convert(sample[col]) for col in columns}
    question_index = [
        {
            "column_number": idx + 1,
            "code": col,
            "title": titles.get(col),
            "found_in_pdf": col in titles,
        }
        for idx, col in enumerate(columns)
        if QUESTION_CODE_PATTERN.match(str(col))
    ]
    return {"columns": columns, "first_row": first_row, "question_index": question_index}

def load_culture_frame(path: Path, entity_type: str, titles: Dict[str, str]) -> pd.DataFrame:
    base_columns = [
        'Country',
        'ParentCountry',
        'ISO3',
        'Latitude',
        'Longitude',
        'Year',
        'Source',
        'QuestionGroup',
        'Question',
        'QuestionCode',
        'Score',
        'EntityType',
    ]
    if entity_type == 'location':
        base_columns.append('Area')

    def _empty_frame() -> pd.DataFrame:
        frame = pd.DataFrame(columns=base_columns)
        frame['EntityType'] = entity_type
        return frame

    if not path.exists() or path.stat().st_size == 0:
        return _empty_frame()
    try:
        raw = pd.read_csv(path)
    except pd.errors.EmptyDataError:
        return _empty_frame()
    if raw.empty:
        return _empty_frame()
    raw = normalise_headers(raw)
    raw = ensure_required_columns(raw, entity_type)

    numeric_columns = ['Latitude', 'Longitude', 'Year']
    for column in numeric_columns:
        if column in raw.columns:
            raw[column] = pd.to_numeric(raw[column], errors='coerce')
    raw = raw.dropna(subset=[col for col in ['Latitude', 'Longitude', 'Year'] if col in raw.columns])
    if raw.empty:
        return _empty_frame()
    raw['Year'] = raw['Year'].astype(int)

    question_columns = [col for col in raw.columns if QUESTION_CODE_PATTERN.match(str(col))]
    if not question_columns:
        if entity_type == 'country':
            raise ValueError(
                f"No survey question columns detected in `{path.name}`. Columns must use official codes like 'A001'."
            )
        return _empty_frame()
    id_columns = [col for col in raw.columns if col not in question_columns]

    melted = raw.melt(id_vars=id_columns, value_vars=question_columns, var_name='QuestionCode', value_name='Score')
    melted['Score'] = pd.to_numeric(melted['Score'], errors='coerce')
    melted = melted.dropna(subset=['Score', 'Latitude', 'Longitude', 'Year'])
    if melted.empty:
        return _empty_frame()
    melted['Score'] = melted['Score'].astype(float)

    melted['QuestionCode'] = melted['QuestionCode'].astype(str)
    melted['QuestionGroup'] = (
        melted['QuestionCode'].str.extract(r'^([A-Za-z]+)', expand=False).str.upper().fillna('GENERAL')
    )
    melted['Question'] = [titles.get(code, code) for code in melted['QuestionCode']]
    melted['EntityType'] = entity_type

    if entity_type == 'country':
        melted['ParentCountry'] = melted['Country'].astype(str)
        melted['DisplayName'] = melted['Country'].astype(str)
    else:
        area_column = None
        for candidate in ['Area', 'Region', 'Location', 'State', 'Province']:
            if candidate in melted.columns:
                area_column = candidate
                break
        if area_column is None:
            melted['Area'] = melted['Country'].astype(str)
            area_column = 'Area'
        melted['ParentCountry'] = melted['Country'].astype(str)
        melted['DisplayName'] = (
            melted[area_column].astype(str).str.strip() + ' — ' + melted['ParentCountry'].astype(str).str.strip()
        )
        melted['Country'] = melted['DisplayName']

    if 'ISO3' not in melted.columns:
        base = melted.get('ParentCountry', melted.get('Country'))
        melted['ISO3'] = base.astype(str).str[:3].str.upper() if base is not None else 'UNK'

    columns = [
        'Country',
        'ParentCountry',
        'ISO3',
        'Latitude',
        'Longitude',
        'Year',
        'Source',
        'QuestionGroup',
        'Question',
        'QuestionCode',
        'Score',
        'EntityType',
    ]
    if 'Area' in melted.columns:
        columns.append('Area')

    melted = melted[[col for col in columns if col in melted.columns]].copy()
    melted['Country'] = melted['Country'].astype(str)
    melted['ParentCountry'] = melted['ParentCountry'].astype(str)
    return melted.reset_index(drop=True)

QUESTION_PDF = find_question_catalogue(DATA_DIR)
OFFICIAL_TITLES = extract_official_question_titles(QUESTION_PDF) if QUESTION_PDF else {}

if not CACHE_PATH.exists():
    CACHE_PATH.write_text("{}", encoding="utf-8")

country_records = load_culture_frame(COUNTRY_PATH, "country", OFFICIAL_TITLES)
location_records = load_culture_frame(LOCATION_PATH, "location", OFFICIAL_TITLES)

country_summary = summarise_spreadsheet(COUNTRY_PATH, OFFICIAL_TITLES)
location_summary = summarise_spreadsheet(LOCATION_PATH, OFFICIAL_TITLES)

COLUMN_SUMMARIES = {
    "country": {"columns": country_summary["columns"], "first_row": country_summary["first_row"]},
    "location": {"columns": location_summary["columns"], "first_row": location_summary["first_row"]},
}
QUESTION_INDEX = {
    "country": country_summary["question_index"],
    "location": location_summary["question_index"],
}

COLUMN_SNAPSHOT_PATH.write_text(
    json.dumps(COLUMN_SUMMARIES, indent=2, ensure_ascii=False),
    encoding="utf-8",
)
QUESTION_INDEX_PATH.write_text(
    json.dumps(QUESTION_INDEX, indent=2, ensure_ascii=False),
    encoding="utf-8",
)

display(
    Markdown(
        "\n".join(
            [
                f"Loaded **{len(country_records)}** country-level responses from `{COUNTRY_PATH.name}` covering **{country_records['Country'].nunique()}** countries.",
                f"Loaded **{len(location_records)}** local observations from `{LOCATION_PATH.name}` across **{location_records['Country'].nunique()}** areas.",
                "Mapped question codes to official titles." if OFFICIAL_TITLES else "Using question codes directly (no PDF detected).",
                f"Captured first-row snapshots in `{COLUMN_SNAPSHOT_PATH.name}` and question index in `{QUESTION_INDEX_PATH.name}`.",
            ]
        )
    )
)

if country_summary["columns"]:
    display(Markdown("**Country data — first row preview (first 10 columns):**"))
    preview_cols = country_summary["columns"][:10]
    display(pd.DataFrame([country_summary["first_row"]])[preview_cols])

if QUESTION_INDEX["country"]:
    display(Markdown("**Sample question index mapping (country data):**"))
    display(pd.DataFrame(QUESTION_INDEX["country"]).head())

country_records.head()
location_records.head()


## 3. Data management utilities

The `CulturalDataset` orchestrates filtering, aggregation, and persistence. `GroupManager` stores survey responses for family/team groups. `OpenAIInsightGenerator` wraps the OpenAI Responses API with a lightweight caching layer that prevents repeated prompts from consuming tokens.

In [3]:
class CulturalDataset:
    required_columns = {
        "Country",
        "ParentCountry",
        "ISO3",
        "Latitude",
        "Longitude",
        "Year",
        "Source",
        "QuestionGroup",
        "Question",
        "QuestionCode",
        "Score",
        "EntityType",
    }

    def __init__(self, country_df: pd.DataFrame, location_df: pd.DataFrame):
        for label, frame in ("country", country_df), ("location", location_df):
            missing = self.required_columns - set(frame.columns)
            if missing:
                raise ValueError(f"{label.capitalize()} data is missing required columns: {missing}")
        self.country_data = country_df.copy()
        self.location_data = location_df.copy()
        self.data = self.country_data.copy()
        self.base_countries = sorted(self.country_data["Country"].unique())
        self.location_lookup = {
            name: group.copy()
            for name, group in self.location_data.groupby("Country")
        }
        self.question_lookup: Dict[Tuple[str, str], str] = {}
        combined = pd.concat([self.country_data, self.location_data], ignore_index=True)
        for _, row in combined[["QuestionGroup", "Question", "QuestionCode"]].drop_duplicates().iterrows():
            self.question_lookup[(row["QuestionGroup"], row["Question"])] = row["QuestionCode"]
        self._refresh_metadata()

    def _refresh_metadata(self) -> None:
        records: List[pd.DataFrame] = []
        for frame in [self.country_data, self.location_data]:
            if frame.empty:
                continue
            meta = (
                frame.groupby("Country")
                .agg(
                    Latitude=("Latitude", "mean"),
                    Longitude=("Longitude", "mean"),
                    ParentCountry=("ParentCountry", "first"),
                    EntityType=("EntityType", "first"),
                )
                .reset_index()
            )
            records.append(meta)
        if records:
            combined = pd.concat(records, ignore_index=True).drop_duplicates("Country", keep="first")
            self.metadata = {row["Country"]: row.to_dict() for _, row in combined.iterrows()}
        else:
            self.metadata = {}
        self.available_entities = sorted(self.data["Country"].unique())

    def get_countries(self) -> List[str]:
        return self.available_entities.copy()

    def get_locations(self) -> List[str]:
        return sorted(self.location_data["Country"].unique())

    def get_years(self) -> List[int]:
        years = self.data["Year"].unique().tolist()
        return sorted(int(year) for year in years)

    def get_group_questions(self) -> Dict[str, List[str]]:
        grouped = self.data.groupby("QuestionGroup")["Question"].unique()
        return {group: sorted(values.tolist()) for group, values in grouped.items()}

    def get_sources(self) -> List[str]:
        return sorted(self.data["Source"].unique())

    def get_question_code(self, group: str, question: str) -> str:
        return self.question_lookup.get((group, question), question)

    def filter_records(self, countries: List[str], year: int) -> pd.DataFrame:
        filtered = self.data[self.data["Country"].isin(countries) & (self.data["Year"] == year)]
        if filtered.empty:
            raise ValueError("No records match the current filter. Try a different year or entity selection.")
        return filtered

    def get_question_matrix(self, countries: List[str], year: int, group: Optional[str] = None) -> pd.DataFrame:
        subset = self.filter_records(countries, year)
        if group and group != "All Groups":
            subset = subset[subset["QuestionGroup"] == group]
        matrix = subset.pivot_table(index="Question", columns="Country", values="Score")
        return matrix.sort_index()

    def compute_weighted_group_scores(self, countries: List[str], year: int, weights: Dict[Tuple[str, str], float]) -> pd.DataFrame:
        subset = self.filter_records(countries, year).copy()
        subset["Weight"] = subset.apply(
            lambda row: float(weights.get((row["QuestionGroup"], row["Question"]), 1.0)),
            axis=1,
        )
        subset["WeightedScore"] = subset["Score"] * subset["Weight"]

        def safe_mean(group_df: pd.DataFrame) -> float:
            weight_sum = group_df["Weight"].sum()
            if weight_sum == 0:
                return float("nan")
            return group_df["WeightedScore"].sum() / weight_sum

        aggregated = subset.groupby(["Country", "QuestionGroup"]).apply(safe_mean).unstack("QuestionGroup")
        return aggregated

    def ensure_entity_available(self, entity_name: str, entity_type: str = "country") -> None:
        if entity_name in self.available_entities:
            return
        if entity_type == "location":
            if entity_name not in self.location_lookup:
                raise ValueError(f"Unknown location '{entity_name}'.")
            self.data = pd.concat([self.data, self.location_lookup[entity_name]], ignore_index=True)
        else:
            raise ValueError(f"Unknown entity '{entity_name}'.")
        self._refresh_metadata()

    def get_map_view(self, entity_type: str, year: int, question_group: str, question: str) -> pd.DataFrame:
        frame = self.country_data if entity_type == "country" else self.location_data
        subset = frame[
            (frame["Year"] == year)
            & (frame["QuestionGroup"] == question_group)
            & (frame["Question"] == question)
        ]
        return subset.copy()

    def get_entity_profile(self, entity_name: str, entity_type: str, year: int) -> pd.DataFrame:
        if entity_type == "country":
            frame = self.country_data
        elif entity_type == "location":
            frame = self.location_data
        else:
            frame = self.data
        subset = frame[(frame["Country"] == entity_name) & (frame["Year"] == year)]
        return subset.copy()

    def add_record(self, record: Dict[str, object]) -> None:
        enriched = record.copy()
        enriched.setdefault("EntityType", "country")
        enriched.setdefault("ParentCountry", enriched.get("Country", ""))
        enriched.setdefault(
            "QuestionCode",
            self.get_question_code(enriched.get("QuestionGroup", ""), enriched.get("Question", "")),
        )
        missing = self.required_columns - set(enriched.keys())
        if missing:
            raise ValueError(f"Record is missing required fields: {missing}")
        new_row = pd.DataFrame([enriched])
        if enriched["EntityType"] == "country":
            self.country_data = pd.concat([self.country_data, new_row], ignore_index=True)
        else:
            self.location_data = pd.concat([self.location_data, new_row], ignore_index=True)
            self.location_lookup[enriched["Country"]] = self.location_data[self.location_data["Country"] == enriched["Country"]]
        self.data = pd.concat([self.data, new_row], ignore_index=True)
        self.question_lookup[(enriched["QuestionGroup"], enriched["Question"])] = enriched["QuestionCode"]
        self._refresh_metadata()

    def bulk_add(self, records: List[Dict[str, object]]) -> None:
        for record in records:
            self.add_record(record)

    def persist(self, country_path: Optional[Path] = None, location_path: Optional[Path] = None) -> None:
        """Optionally export in-memory data to alternate CSV files."""
        if country_path is None and location_path is None:
            return
        if country_path is not None:
            self.country_data.to_csv(country_path, index=False)
        if location_path is not None:
            self.location_data.to_csv(location_path, index=False)


class GroupManager:
    def __init__(self, dataset: CulturalDataset):
        self.dataset = dataset
        self.groups: Dict[str, Dict[str, object]] = {}

    def _validate_responses(self, responses: Dict[Tuple[str, str], float]) -> None:
        valid_questions = self.dataset.get_group_questions()
        for (group, question), value in responses.items():
            if group not in valid_questions:
                raise ValueError(f"Unknown question group: {group}")
            if question not in valid_questions[group]:
                raise ValueError(f"Unknown question '{question}' for group '{group}'.")
            if not (0.0 <= value <= 1.0):
                raise ValueError("Scores must be normalised between 0 and 1.")

    def add_member(
        self,
        group_name: str,
        member_name: str,
        responses: Dict[Tuple[str, str], float],
        group_type: str = "team",
        reference_year: Optional[int] = None,
    ) -> None:
        group_name = group_name.strip()
        if not group_name:
            raise ValueError("Group name cannot be empty.")
        if not member_name.strip():
            raise ValueError("Member name cannot be empty.")
        self._validate_responses(responses)
        record = self.groups.setdefault(
            group_name,
            {
                "type": group_type,
                "members": {},
                "reference_year": reference_year,
            },
        )
        record["members"][member_name] = responses
        if reference_year is not None:
            record["reference_year"] = reference_year

    def get_group_members(self, group_name: str) -> Dict[str, Dict[Tuple[str, str], float]]:
        group = self.groups.get(group_name)
        if not group:
            raise KeyError(f"Group '{group_name}' not found.")
        return group["members"]

    def compute_group_profile(self, group_name: str) -> pd.Series:
        members = self.get_group_members(group_name)
        if not members:
            raise ValueError("Group has no members recorded yet.")
        df = pd.DataFrame(members).T
        profile = df.mean(axis=0)
        profile.name = group_name
        return profile

    def reference_year(self, group_name: str) -> Optional[int]:
        group = self.groups.get(group_name)
        return group.get("reference_year") if group else None

    def match_closest_country(self, group_name: str, year: Optional[int] = None) -> pd.DataFrame:
        profile = self.compute_group_profile(group_name)
        question_pairs = list(profile.index)
        year = year or self.reference_year(group_name) or max(self.dataset.get_years())
        entities = self.dataset.get_countries()
        subset = self.dataset.filter_records(entities, year)
        subset = subset[subset.apply(lambda row: (row["QuestionGroup"], row["Question"]) in question_pairs, axis=1)]
        matrix = subset.pivot_table(index="Country", columns=["QuestionGroup", "Question"], values="Score")

        def distance(row: pd.Series) -> float:
            diff = row - profile
            diff = diff.fillna(diff.mean())
            return float(np.sqrt((diff ** 2).sum()))

        scores = matrix.apply(distance, axis=1).sort_values()
        result = scores.to_frame(name="EuclideanDistance")
        result["Similarity (lower is better)"] = result["EuclideanDistance"]
        return result


class OpenAIInsightGenerator:
    prompt_templates = {
        "question_basis": """You are a cultural analyst. Explain why communities scoring {score} on '{question}'\nfrom the {source} might interpret situations in that way. Keep the answer under 250 words.""",
        "situation_lens": """Given the cultural score(s) {scores} from the {source}, describe the likely lens through\nwhich a person would interpret the following situation: {situation}. Limit to 250 words.""",
        "score_delta": """Compare scores {score_a} and {score_b} from the {source}. Explain why these perspectives might clash or align and provide actionable advice in under 200 words.""",
        "collaboration": """Two or more people have scores {scores} from the {source}. Provide practical guidance on how they can work together effectively. 200 words maximum.""",
        "family": """Family members with cultural profile {scores} from the {source} are seeking harmony. Offer strategies to improve their dynamics in under 200 words.""",
    }

    def __init__(self, cache_path: Path):
        self.cache_path = cache_path
        try:
            self.cache: Dict[str, str] = json.loads(cache_path.read_text(encoding="utf-8"))
        except json.JSONDecodeError:
            self.cache = {}
        self.client: Optional[OpenAI] = None
        self.api_key: Optional[str] = None

    def set_api_key(self, api_key: str) -> None:
        if OpenAI is None:
            raise ImportError("The openai package is not installed. Re-run the setup cell to install it.")
        self.api_key = api_key
        self.client = OpenAI(api_key=api_key)

    def _cache_key(self, prompt: str) -> str:
        return hashlib.sha256(prompt.encode("utf-8")).hexdigest()

    def _render_prompt(self, template_key: str, **kwargs) -> str:
        if template_key not in self.prompt_templates:
            raise KeyError(f"Unknown template '{template_key}'")
        prompt = self.prompt_templates[template_key].format(**kwargs)
        prompt += "\n\nDo not request or rely on personal data."
        return prompt

    def generate(self, template_key: str, **kwargs) -> str:
        prompt = self._render_prompt(template_key, **kwargs)
        cache_key = self._cache_key(prompt)
        if cache_key in self.cache:
            return self.cache[cache_key]
        if self.client is None:
            return "OpenAI client not configured. Set your API key with `insight_generator.set_api_key('sk-...')`."
        response = self.client.responses.create(
            model="gpt-4o-mini",
            input=[{"role": "user", "content": prompt}],
            max_output_tokens=500,
        )
        message = response.output_text.strip()
        self.cache[cache_key] = message
        self.cache_path.write_text(json.dumps(self.cache, indent=2), encoding="utf-8")
        return message


dataset = CulturalDataset(country_records, location_records)
group_manager = GroupManager(dataset)
insight_generator = OpenAIInsightGenerator(CACHE_PATH)
display(
    Markdown(
        "* Comparison-ready entities: "
        + ", ".join(dataset.get_countries())
        + "\n* Local areas available: "
        + (", ".join(dataset.get_locations()) or "None yet")
        + "\n* Years available: "
        + ", ".join(map(str, dataset.get_years()))
        + "\n* Survey sources: "
        + ", ".join(dataset.get_sources())
    )
)


* Countries available: Canada, Japan, Sweden, United States
* Years available: 2010, 2020
* Survey sources: EVS, WVS

## 4. Interactive cultural map (Leaflet.js via folium)

In [4]:
import json
from branca.element import Element


def _score_to_color(score: float) -> str:
    score = float(score)
    score = max(0.0, min(1.0, score))
    red = int(255 * score)
    blue = int(255 * (1 - score))
    green = int(180 * (1 - abs(score - 0.5) * 2))
    return f"#{red:02x}{green:02x}{blue:02x}"


def _build_profile_html(df: pd.DataFrame) -> str:
    if df.empty:
        return "<p>No responses available for the selected year.</p>"
    pivot = df.pivot_table(index="QuestionGroup", columns="Question", values="Score")
    pivot = pivot.sort_index().round(3).fillna("—")
    return pivot.to_html(classes="culture-profile-table", border=0)


def render_culture_map(
    year: int,
    question_group: str,
    question: str,
    comparison_command: str = "comparison_dashboard.add_from_map",
    zoom_threshold: int = 5,
) -> folium.Map:
    year = int(year)
    country_subset = dataset.get_map_view("country", year, question_group, question)
    location_subset = dataset.get_map_view("location", year, question_group, question)
    if country_subset.empty and location_subset.empty:
        raise ValueError("No data available for the selected question and year.")

    fmap = folium.Map(location=[20, 0], zoom_start=2, tiles="cartodbpositron", control_scale=True)
    sidebar_html = """
    <style>
    #culture-sidebar {position:absolute;top:10px;right:10px;width:340px;max-height:80vh;overflow:auto;background:#ffffff;border-radius:12px;box-shadow:0 6px 20px rgba(0,0,0,0.15);z-index:9999;font-family:'Helvetica Neue', Arial, sans-serif;}
    #culture-sidebar header {padding:12px 16px;border-bottom:1px solid #e5e7eb;}
    #culture-sidebar header h3 {margin:0;font-size:18px;color:#1f2937;}
    #culture-sidebar-content {padding:16px;font-size:13px;line-height:1.5;color:#374151;}
    #culture-sidebar-add {margin:0 16px 16px 16px;padding:10px 16px;background:#2563eb;color:#ffffff;border:none;border-radius:8px;font-weight:600;cursor:pointer;display:block;width:calc(100% - 32px);}
    #culture-sidebar-add:disabled {background:#9ca3af;cursor:not-allowed;}
    .culture-profile-table {width:100%;border-collapse:collapse;margin-top:12px;font-size:12px;}
    .culture-profile-table thead th {position:sticky;top:0;background:#f9fafb;color:#111827;padding:6px;border-bottom:1px solid #e5e7eb;}
    .culture-profile-table td {padding:6px;border-bottom:1px solid #f3f4f6;text-align:center;}
    </style>
    <div id="culture-sidebar">
        <header><h3>Selection details</h3></header>
        <div id="culture-sidebar-content">Hover a marker to preview scores, then click to explore full details.</div>
        <button id="culture-sidebar-add" disabled>Add to comparison</button>
    </div>
    """
    fmap.get_root().html.add_child(Element(sidebar_html))

    country_layer = folium.FeatureGroup(name="Countries", show=True)
    location_layer = folium.FeatureGroup(name="Local areas", show=False)
    country_layer.add_to(fmap)
    location_layer.add_to(fmap)

    markers_metadata: List[Dict[str, object]] = []

    for row in country_subset.itertuples():
        tooltip = f"{row.Country}: {row.Score:.2f}"
        marker = CircleMarker(
            location=(row.Latitude, row.Longitude),
            radius=12,
            color="#1d4ed8",
            weight=2,
            fill=True,
            fill_color=_score_to_color(row.Score),
            fill_opacity=max(min(row.Score, 1.0), 0.2),
            tooltip=tooltip,
        ).add_to(country_layer)
        profile_df = dataset.get_entity_profile(row.Country, row.EntityType, year)
        markers_metadata.append(
            {
                "layerName": marker.get_name(),
                "title": row.Country,
                "parent": row.ParentCountry,
                "score": f"{row.Score:.2f}",
                "entityType": "country",
                "comparisonKey": row.Country,
                "question": question,
                "questionGroup": question_group,
                "year": int(year),
                "source": row.Source,
                "profileHtml": _build_profile_html(profile_df),
                "tooltip": tooltip,
            }
        )

    for row in location_subset.itertuples():
        tooltip = f"{row.Country}: {row.Score:.2f}"
        marker = CircleMarker(
            location=(row.Latitude, row.Longitude),
            radius=7,
            color="#f97316",
            weight=1.5,
            fill=True,
            fill_color=_score_to_color(row.Score),
            fill_opacity=max(min(row.Score, 1.0), 0.2),
            tooltip=tooltip,
        ).add_to(location_layer)
        profile_df = dataset.get_entity_profile(row.Country, row.EntityType, year)
        markers_metadata.append(
            {
                "layerName": marker.get_name(),
                "title": row.Country,
                "parent": row.ParentCountry,
                "score": f"{row.Score:.2f}",
                "entityType": "location",
                "comparisonKey": row.Country,
                "question": question,
                "questionGroup": question_group,
                "year": int(year),
                "source": row.Source,
                "profileHtml": _build_profile_html(profile_df),
                "tooltip": tooltip,
            }
        )

    folium.LayerControl(collapsed=False).add_to(fmap)

    metadata_json = json.dumps(markers_metadata)
    script = """
    <script>
    (function() {
        const map = __MAP_NAME__;
        const markers = __MARKERS__;
        const addTarget = __COMMAND__;
        const sidebar = document.getElementById('culture-sidebar');
        const content = document.getElementById('culture-sidebar-content');
        const addBtn = document.getElementById('culture-sidebar-add');
        if (!sidebar || !content || !addBtn) {
            return;
        }
        function renderDetail(item) {
            const parentLine = item.entityType === 'location' ? `<p><strong>Parent country:</strong> ${item.parent}</p>` : '';
            content.innerHTML = `<h4 style="margin:0 0 8px 0;">${item.title}</h4>` +
                `<p style=\"margin:0 0 8px 0;\"><strong>Question:</strong> ${item.question} (${item.questionGroup})</p>` +
                `<p style=\"margin:0 0 12px 0;\"><strong>Score:</strong> ${item.score} &nbsp;·&nbsp; ${item.source} (${item.year})</p>` +
                parentLine +
                item.profileHtml;
            addBtn.disabled = false;
            addBtn.dataset.entity = item.comparisonKey;
            addBtn.dataset.entityType = item.entityType;
        }
        addBtn.addEventListener('click', function() {
            const entity = this.dataset.entity;
            const entityType = this.dataset.entityType || 'country';
            if (!entity || !window.Jupyter) {
                return;
            }
            const command = addTarget + '(' + JSON.stringify(entity) + ', ' + JSON.stringify(entityType) + ')';
            window.Jupyter.notebook.kernel.execute(command);
        });
        markers.forEach(function(item) {
            const layer = window[item.layerName];
            if (!layer) {
                return;
            }
            layer.on('click', function() {
                renderDetail(item);
            });
        });
        const locationLayer = window['__LOCATION_LAYER__'];
        function syncLayers() {
            if (!locationLayer) {
                return;
            }
            if (map.getZoom() >= __ZOOM_THRESHOLD__) {
                if (!map.hasLayer(locationLayer)) {
                    map.addLayer(locationLayer);
                }
            } else {
                if (map.hasLayer(locationLayer)) {
                    map.removeLayer(locationLayer);
                }
            }
        }
        map.on('zoomend', syncLayers);
        syncLayers();
    })();
    </script>
    """
    script = (
        script.replace('__MAP_NAME__', fmap.get_name())
        .replace('__MARKERS__', metadata_json)
        .replace('__COMMAND__', json.dumps(comparison_command))
        .replace('__LOCATION_LAYER__', location_layer.get_name())
        .replace('__ZOOM_THRESHOLD__', str(zoom_threshold))
    )
    fmap.get_root().html.add_child(Element(script))
    return fmap


def display_default_map() -> None:
    default_year = max(dataset.get_years())
    default_group = next(iter(dataset.get_group_questions().keys()))
    default_question = dataset.get_group_questions()[default_group][0]
    display(
        Markdown(
            f"Interactive map for **{default_question}** ({default_group}, {default_year}). Hover to preview, click to open the side panel, then add the entity to the comparison dashboard."
        )
    )
    fmap = render_culture_map(default_year, default_group, default_question)
    display(fmap)


display_default_map()


Interactive map for **Environmental Concern** (Survival vs Self-Expression, 2020).

## 5. Comparison matrix with adjustable weights

Use the widget below to select any combination of countries and tune the relative importance of each survey question. The accordion exposes per-question weights while the dashboard summarises question-level and group-level standings.

In [5]:
class ComparisonDashboard:
    def __init__(self, dataset: CulturalDataset):
        self.dataset = dataset
        self.weight_controls: Dict[str, Dict[str, widgets.FloatSlider]] = {}

        self.country_select = widgets.SelectMultiple(
            options=dataset.get_countries(),
            value=tuple(dataset.get_countries()[:3]),
            description='Countries',
            layout=widgets.Layout(width='300px', height='200px'),
        )
        self.year_dropdown = widgets.Dropdown(
            options=dataset.get_years(),
            value=max(dataset.get_years()),
            description='Year',
        )
        group_options = ['All Groups'] + list(dataset.get_group_questions().keys())
        self.group_dropdown = widgets.Dropdown(
            options=group_options,
            value='All Groups',
            description='Focus',
        )
        self.output_table = widgets.Output()
        self.output_heatmap = widgets.Output()

        weight_children = []
        for group, questions in dataset.get_group_questions().items():
            sliders = []
            controls = {}
            for question in questions:
                slider = widgets.FloatSlider(
                    value=1.0,
                    min=0.0,
                    max=2.0,
                    step=0.05,
                    description=question[:22] + ('…' if len(question) > 22 else ''),
                    readout=True,
                    readout_format='.2f',
                    style={'description_width': 'initial'},
                )
                sliders.append(slider)
                controls[question] = slider
            self.weight_controls[group] = controls
            weight_children.append(widgets.VBox(sliders))
        self.weight_accordion = widgets.Accordion(children=weight_children)
        for idx, group in enumerate(dataset.get_group_questions().keys()):
            self.weight_accordion.set_title(idx, group)

        self.country_select.observe(self._update_dashboard, names='value')
        self.year_dropdown.observe(self._update_dashboard, names='value')
        self.group_dropdown.observe(self._update_dashboard, names='value')
        for controls in self.weight_controls.values():
            for slider in controls.values():
                slider.observe(self._update_dashboard, names='value')

        self.container = widgets.VBox([
            widgets.HBox([self.country_select, widgets.VBox([self.year_dropdown, self.group_dropdown])]),
            self.weight_accordion,
            self.output_table,
            self.output_heatmap,
        ])
        self._update_dashboard()

    def _collect_weights(self) -> Dict[Tuple[str, str], float]:
        weights = {}
        for group, controls in self.weight_controls.items():
            for question, slider in controls.items():
                weights[(group, question)] = slider.value
        return weights

    def _update_dashboard(self, *_):
        if not self.country_select.value:
            return
        countries = list(self.country_select.value)
        year = self.year_dropdown.value
        group_focus = self.group_dropdown.value
        weights = self._collect_weights()

        with self.output_table:
            clear_output(wait=True)
            try:
                matrix = self.dataset.get_question_matrix(countries, year, group_focus)
                display(Markdown("### Question-level comparison"))
                display(matrix.round(3))
                if group_focus != 'All Groups':
                    focus_weights = {q: weights[(group_focus, q)] for q in matrix.index}
                    summary = (
                        matrix.mul(pd.Series(focus_weights), axis=0).sum(axis=0) /
                        max(sum(focus_weights.values()), 1e-9)
                    )
                    display(Markdown("**Weighted group score (per country):**"))
                    display(summary.round(3).to_frame(name='Score'))
                else:
                    aggregated = self.dataset.compute_weighted_group_scores(countries, year, weights)
                    display(Markdown("### Group-level weighted scores"))
                    display(aggregated.round(3))
            except Exception as exc:
                display(Markdown(f"**Error:** {exc}"))

        with self.output_heatmap:
            clear_output(wait=True)
            try:
                aggregated = self.dataset.compute_weighted_group_scores(countries, year, weights)
                heatmap = go.Figure(
                    data=go.Heatmap(
                        z=aggregated.values,
                        x=list(aggregated.columns),
                        y=list(aggregated.index),
                        colorscale='Viridis',
                        zmin=0,
                        zmax=1,
                    )
                )
                heatmap.update_layout(
                    title='Weighted cultural proximity',
                    xaxis_title='Question Group',
                    yaxis_title='Country',
                    height=400,
                )
                heatmap.show()
            except Exception as exc:
                display(Markdown(f"**Unable to render heatmap:** {exc}"))

    def add_from_map(self, entity_name: str, entity_type: str = "country") -> None:
        try:
            self.dataset.ensure_entity_available(entity_name, entity_type)
        except ValueError as exc:
            with self.output_table:
                clear_output(wait=True)
                display(Markdown(f"**Map selection error:** {exc}"))
            return
        options = list(self.country_select.options)
        if entity_name not in options:
            options.append(entity_name)
            options.sort(key=str.lower)
            self.country_select.options = options
        current = list(self.country_select.value)
        if entity_name not in current:
            current.append(entity_name)
            self.country_select.value = tuple(current)
        self._update_dashboard()

    def display(self):
        display(self.container)

comparison_dashboard = ComparisonDashboard(dataset)
comparison_dashboard.display()

VBox(children=(HBox(children=(SelectMultiple(description='Countries', index=(0, 1, 2), layout=Layout(height='2…

## 6. Family and team survey capture

Record member scores (normalised 0–1) for a family or team group below. After adding members you can finalise the profile to discover the nearest national culture using Euclidean similarity across the shared question space.

In [6]:
class GroupSurveyWidget:
    def __init__(self, dataset: CulturalDataset, manager: GroupManager):
        self.dataset = dataset
        self.manager = manager
        self.output = widgets.Output()
        self._build_widgets()

    def _build_widgets(self):
        self.group_type = widgets.ToggleButtons(
            options=[('Family', 'family'), ('Team', 'team')],
            description='Group type',
        )
        self.group_name = widgets.Text(description='Group name', placeholder='e.g. Rivera Family')
        self.member_name = widgets.Text(description='Member name', placeholder='e.g. Ana')
        self.year_dropdown = widgets.Dropdown(
            options=self.dataset.get_years(),
            value=max(self.dataset.get_years()),
            description='Ref. year',
        )
        self.question_sliders: Dict[Tuple[str, str], widgets.FloatSlider] = {}
        slider_boxes = []
        for group, questions in self.dataset.get_group_questions().items():
            sliders = []
            for question in questions:
                slider = widgets.FloatSlider(
                    value=0.5,
                    min=0.0,
                    max=1.0,
                    step=0.01,
                    description=question[:28] + ('…' if len(question) > 28 else ''),
                    style={'description_width': 'initial'},
                    readout=True,
                    readout_format='.2f',
                )
                self.question_sliders[(group, question)] = slider
                sliders.append(slider)
            slider_boxes.append(widgets.VBox([widgets.HTML(f"<h4>{group}</h4>")] + sliders))
        self.slider_accordion = widgets.Accordion(children=slider_boxes)
        for idx, group in enumerate(self.dataset.get_group_questions().keys()):
            self.slider_accordion.set_title(idx, group)

        self.add_member_button = widgets.Button(description='Add member responses', button_style='success')
        self.finalise_button = widgets.Button(description='Finalise & match', button_style='primary')
        self.add_member_button.on_click(self._handle_add_member)
        self.finalise_button.on_click(self._handle_finalise)

        self.form = widgets.VBox([
            widgets.HBox([self.group_type, self.group_name, self.member_name, self.year_dropdown]),
            self.slider_accordion,
            widgets.HBox([self.add_member_button, self.finalise_button]),
            self.output,
        ])

    def _collect_scores(self) -> Dict[Tuple[str, str], float]:
        return {key: slider.value for key, slider in self.question_sliders.items()}

    def _handle_add_member(self, _):
        try:
            responses = self._collect_scores()
            self.manager.add_member(
                group_name=self.group_name.value,
                member_name=self.member_name.value,
                responses=responses,
                group_type=self.group_type.value,
                reference_year=self.year_dropdown.value,
            )
            with self.output:
                clear_output(wait=True)
                display(Markdown(f"✅ Added survey for **{self.member_name.value}**."))
            self.member_name.value = ''
        except Exception as exc:
            with self.output:
                clear_output(wait=True)
                display(Markdown(f"⚠️ {exc}"))

    def _handle_finalise(self, _):
        try:
            group_name = self.group_name.value
            profile = self.manager.compute_group_profile(group_name)
            match_df = self.manager.match_closest_country(group_name)
            with self.output:
                clear_output(wait=True)
                display(Markdown(f"### Profile for **{group_name}**"))
                display(profile.round(3).to_frame(name='Average score'))
                display(Markdown("### Closest national cultures"))
                display(match_df.head(5).round(3))
        except Exception as exc:
            with self.output:
                clear_output(wait=True)
                display(Markdown(f"⚠️ {exc}"))

    def display(self):
        display(self.form)

group_widget = GroupSurveyWidget(dataset, group_manager)
group_widget.display()

VBox(children=(HBox(children=(ToggleButtons(description='Group type', options=(('Family', 'family'), ('Team', …

## 7. Pure Python backend helpers
The previous FastAPI layer has been replaced with lightweight utility functions so that everything runs natively within this notebook.


In [None]:
class CultureExplorerService:
    """Convenience wrapper exposing culture analytics without requiring a web framework."""

    def __init__(self, dataset: CulturalDataset, group_manager: GroupManager, insight_generator: OpenAIInsightGenerator):
        self.dataset = dataset
        self.group_manager = group_manager
        self.insight_generator = insight_generator

    def get_countries(self) -> List[str]:
        return self.dataset.get_countries()

    def get_years(self) -> List[int]:
        return self.dataset.get_years()

    def get_groups(self) -> Dict[str, List[str]]:
        return self.dataset.get_group_questions()

    def get_score_matrix(self, countries: List[str], year: int, group: Optional[str] = None) -> pd.DataFrame:
        return self.dataset.get_question_matrix(countries, year, group)

    def add_record(self, record: Dict[str, object]) -> Dict[str, object]:
        self.dataset.add_record(record)
        return {"status": "ok", "records": len(self.dataset.data), "entities": self.dataset.get_countries()}

    def add_group_member(
        self,
        group_name: str,
        member_name: str,
        responses: Dict[str, Dict[str, float]],
        group_type: str = "team",
        reference_year: Optional[int] = None,
    ) -> Dict[str, object]:
        flattened = {
            (group, question): score
            for group, questions in responses.items()
            for question, score in questions.items()
        }
        self.group_manager.add_member(
            group_name=group_name,
            member_name=member_name,
            responses=flattened,
            group_type=group_type,
            reference_year=reference_year,
        )
        members = self.group_manager.get_group_members(group_name)
        return {"status": "ok", "members": sorted(members.keys())}

    def get_group_profile(self, group_name: str) -> pd.Series:
        return self.group_manager.compute_group_profile(group_name)

    def match_group_to_countries(self, group_name: str, year: Optional[int] = None) -> pd.DataFrame:
        return self.group_manager.match_closest_country(group_name, year)

    def generate_insight(self, template_key: str, **payload: object) -> str:
        return self.insight_generator.generate(template_key, **payload)

service = CultureExplorerService(dataset, group_manager, insight_generator)
display(Markdown("Pure Python helper `service` initialised. Use its methods to explore and manipulate the dataset without running a web server."))


## 8. Backend helper verification
Quick smoke-tests ensure the lightweight service behaves as expected when called directly from Python.


In [None]:
latest_year = max(service.get_years())
sample_countries = service.get_countries()[:2]
matrix = service.get_score_matrix(sample_countries, latest_year)

display(Markdown(
    f"Validated helper service with {len(service.get_countries())} countries and {len(service.get_years())} years of data."
))
display(matrix.head())

sample_record = {
    'Country': 'Testland',
    'ISO3': 'TST',
    'Latitude': 10.0,
    'Longitude': 20.0,
    'Year': latest_year,
    'Source': 'WVS',
    'QuestionGroup': list(service.get_groups().keys())[0],
    'Question': list(service.get_groups().values())[0][0],
    'Score': 0.5,
}
snapshot = service.add_record(sample_record)
display(Markdown(f"✅ Added record via helper service; dataset now tracks {snapshot['records']} entries."))

service.add_group_member(
    group_name='Demo Helpers',
    member_name='Analyst',
    responses={sample_record['QuestionGroup']: {sample_record['Question']: sample_record['Score']}},
    reference_year=latest_year,
)
profile = service.get_group_profile('Demo Helpers')
display(Markdown(f"Computed profile for Demo Helpers with {len(profile)} question entries."))

# Clean up temporary artefacts
dataset.data = dataset.data[dataset.data['Country'] != 'Testland']
dataset.country_data = dataset.country_data[dataset.country_data['Country'] != 'Testland']
group_manager.groups.pop('Demo Helpers', None)
dataset._refresh_metadata()


## 9. OpenAI cultural insights

In [None]:
# Example usage (commented out to avoid accidental execution without a key):
# insight_generator.set_api_key('sk-...')
sample_response = service.generate_insight(
    'question_basis',
    score=0.62,
    question='Generalized Trust',
    source='World Values Survey',
)
display(Markdown(f"Sample response (cached or placeholder): {sample_response}"))


## 10. Adding retrospective or future data

In [10]:
def add_future_record(
    country: str,
    iso3: str,
    latitude: float,
    longitude: float,
    year: int,
    source: str,
    question_group: str,
    question: str,
    score: float,
    entity_type: str = 'country',
    parent_country: Optional[str] = None,
) -> None:
    question_code = dataset.get_question_code(question_group, question)
    record = {
        'Country': country,
        'ISO3': iso3,
        'Latitude': latitude,
        'Longitude': longitude,
        'Year': year,
        'Source': source,
        'QuestionGroup': question_group,
        'Question': question,
        'QuestionCode': question_code,
        'Score': score,
        'EntityType': entity_type,
        'ParentCountry': parent_country or country,
    }
    dataset.add_record(record)


add_future_record(
    country='Futuria',
    iso3='FTR',
    latitude=12.34,
    longitude=56.78,
    year=2025,
    source='EVS',
    question_group=list(dataset.get_group_questions().keys())[0],
    question=dataset.get_group_questions()[list(dataset.get_group_questions().keys())[0]][0],
    score=0.67,
)
display(Markdown("Appended a placeholder future data point for Futuria (2025)."))
dataset.data = dataset.data[dataset.data['Country'] != 'Futuria']
dataset.country_data = dataset.country_data[dataset.country_data['Country'] != 'Futuria']
dataset._refresh_metadata()

Appended a placeholder future data point for Futuria (2025).