In [1]:
import pandas as pd

df = pd.read_csv("../data/loans.csv")
df.head()


Unnamed: 0,Loan ID,Status,Client ID,Debtor Identifier,Debtor Identifier Type,Debtor Country,Trust ID,Amount,Created At,Accepted At,Refused At,Repaid At,Due Date,Insurance Status
0,ccba7b5961ac84c6bc09ba20b0497bd666ac10a7ecf123...,REPAID,2477304f4867e7ea86fd4414e0f845c0e4bd06516fe687...,17c277f8d264ccec868dc55add915dc93fd4ae4bd779cf...,siren,FR,e98a46aea01b6f55341744cdddbba3d6a88ab1e7d366eb...,13643.52,2025-01-24T09:52:41.912,2025-01-24T09:52:54.08,,2025-03-06T05:05:38.362,2025-03-25T08:00:00,SETTLED
1,ea43f967166a7809d0e0f27fff33a3732fa600513d1b5f...,REPAID,a57af78c8caa2a9c6efbf8d9535f34441319c66ba56803...,494f60d81e3f8e6effbf093a1c2c6d0a99ca9d561bf624...,siren,FR,e98a46aea01b6f55341744cdddbba3d6a88ab1e7d366eb...,58.97,2025-08-27T07:51:48.154,2025-08-27T09:59:54.827,,2025-09-30T10:30:19,01/10/2025,
2,1d39f1f6c61cef090d31c552b0d3e511280ba64c2fb69a...,REPAID,dd3a54bd37885757d4c4b3dbd85c5fdcffa351fe2d1680...,8ab1efec5e005f628c0e5793276ddaf1e3693cf880a0a9...,cif,ES,e98a46aea01b6f55341744cdddbba3d6a88ab1e7d366eb...,6654.03,2025-02-24T09:13:01.171,2025-02-24T16:51:01.077,,2025-06-03T18:32:50,2025-05-20T09:12:33.344,REFUSED
3,f03b65936792e9d35e66db0572aa43cdc5f2d33d75fcd0...,REPAID,dd3a54bd37885757d4c4b3dbd85c5fdcffa351fe2d1680...,8563f72a004fcba1d5ae23410ccf82a6d8bcc85fb6ccfd...,kvk,NL,c4ed1a68f3ad7b3c85c4400e688e4dd3dcfe4da53d171d...,4452.8,2025-10-07T06:19:38.206,2025-10-07T06:19:47.087,,2025-10-29T15:34:56,2025-11-01T06:19:09.316,SETTLED
4,70290cf7ced390115c4443cab5f519fcf8ac52a11dafbd...,REPAID,dd3a54bd37885757d4c4b3dbd85c5fdcffa351fe2d1680...,17c277f8d264ccec868dc55add915dc93fd4ae4bd779cf...,siret,FR,e98a46aea01b6f55341744cdddbba3d6a88ab1e7d366eb...,966.08,2025-07-29T12:21:45.349,2025-07-29T12:21:54.306,,2025-09-05T14:43:14,2025-09-22T12:21:41.397,SETTLED


In [2]:
df.columns = (
    df.columns
      .str.strip()
      .str.lower()
      .str.replace(" ", "_")
)

df.columns.tolist()

['loan_id',
 'status',
 'client_id',
 'debtor_identifier',
 'debtor_identifier_type',
 'debtor_country',
 'trust_id',
 'amount',
 'created_at',
 'accepted_at',
 'refused_at',
 'repaid_at',
 'due_date',
 'insurance_status']

In [3]:
import numpy as np
import pandas as pd

insured_statuses = {"ACTIVATED", "CLAIMED", "EXPIRED", "SETTLED", "TIME_BARRED"}
not_insured_statuses = {"REFUSED", "REJECTED"}

df["accepted_at"] = pd.to_datetime(df["accepted_at"], errors="coerce")
df["amount"] = pd.to_numeric(df["amount"], errors="coerce")

prod = df.loc[
    df["accepted_at"].notna() & df["amount"].notna(),
    ["accepted_at", "debtor_country", "insurance_status", "amount"]
].copy()

prod["month"] = prod["accepted_at"].dt.to_period("M").dt.to_timestamp()

prod["insured_amount"] = np.select(
    [
        prod["insurance_status"].isin(insured_statuses),
        prod["insurance_status"].isin(not_insured_statuses),
    ],
    [
        prod["amount"],  
        0.0,             
    ],
    default=np.nan       
)

insurance_rate = (
    prod.groupby(["month", "debtor_country"], as_index=False)
        .agg(
            total_production=("amount", "sum"),
            insured_production=("insured_amount", "sum")
        )
)

insurance_rate["insurance_rate_of_production"] = (
    insurance_rate["insured_production"]
    / insurance_rate["total_production"]
)

insurance_rate = insurance_rate.sort_values(
    ["month", "debtor_country"]
).reset_index(drop=True)

insurance_rate.head()

Unnamed: 0,month,debtor_country,total_production,insured_production,insurance_rate_of_production
0,2025-01-01,AE,258346.59,82203.52,0.318191
1,2025-01-01,AT,19236.94,19236.94,1.0
2,2025-01-01,BE,2033606.65,1837390.81,0.903513
3,2025-01-01,CH,376640.47,80481.42,0.213682
4,2025-01-01,DE,353997.9,321702.35,0.908769


In [4]:
import pandas as pd
import numpy as np
import plotly.express as px
import ipywidgets as widgets
from IPython.display import display, clear_output

required_cols = {"month", "debtor_country", "insurance_rate_of_production", "total_production", "insured_production"}
missing = required_cols - set(insurance_rate.columns)
if missing:
    raise ValueError(f"insurance_rate is missing columns: {missing}")

insurance_rate["month"] = pd.to_datetime(insurance_rate["month"], errors="coerce")
viz_df = insurance_rate.dropna(subset=["month", "debtor_country"]).copy()

min_date = viz_df["month"].min().date()
max_date = viz_df["month"].max().date()

from_date = widgets.DatePicker(description="From:", value=min_date)
to_date   = widgets.DatePicker(description="To:", value=max_date)

chart_type = widgets.Dropdown(
    options=["Line (Rate %)", "Bar (Rate %)", "Heatmap (Rate %)", "Line (Production Amount)"],
    value="Line (Rate %)",
    description="View:",
    layout=widgets.Layout(width="320px")
)

countries = sorted(viz_df["debtor_country"].dropna().unique().tolist())

country_select = widgets.SelectMultiple(
    options=countries,
    value=tuple(countries[:min(5, len(countries))]),  # preselect a few to avoid clutter
    description="Countries:",
    layout=widgets.Layout(width="320px", height="160px")
)

select_all = widgets.Checkbox(value=False, description="Select all countries")

output = widgets.Output()

def apply_month_axis(fig):
    fig.update_layout(
        xaxis=dict(
            tickmode="linear",
            dtick="M1",
            tickformat="%b %Y"
        )
    )
    return fig

def get_filtered_df():
    if from_date.value is None or to_date.value is None:
        return None, "Please select both From and To dates."

    start = pd.Timestamp(from_date.value)
    end = pd.Timestamp(to_date.value)

    if start > end:
        return None, "From date must be earlier than To date."

    d = viz_df.loc[(viz_df["month"] >= start) & (viz_df["month"] <= end)].copy()

    selected = list(country_select.value)
    if selected:
        d = d.loc[d["debtor_country"].isin(selected)].copy()

    if d.empty:
        return None, "No data for the selected timeframe / countries."

    return d, None

def render(_=None):
    with output:
        clear_output(wait=True)

        d, err = get_filtered_df()
        if err:
            print(err)
            return

        start = pd.Timestamp(from_date.value)
        end = pd.Timestamp(to_date.value)
        selected = list(country_select.value)

        subtitle = f"({start.date()} to {end.date()})"
        if selected:
            subtitle += f" — {len(selected)} country(ies)"

        if chart_type.value == "Line (Rate %)":
            fig = px.line(
                d,
                x="month",
                y="insurance_rate_of_production",
                color="debtor_country",
                markers=True,
                title=f"Insurance Rate of Production by Month {subtitle}"
            )
            fig.update_layout(
                yaxis_title="Insurance Rate of Production (%)",
                yaxis_tickformat=".0%",
                xaxis_title="Month",
                legend_title_text="Country"
            )
            fig.update_traces(
                hovertemplate="<b>%{x|%b %Y}</b><br>Country: %{legendgroup}<br>Rate: %{y:.1%}<extra></extra>"
            )
            apply_month_axis(fig).show()

        elif chart_type.value == "Bar (Rate %)":
            fig = px.bar(
                d,
                x="month",
                y="insurance_rate_of_production",
                color="debtor_country",
                barmode="group",
                title=f"Insurance Rate of Production by Month {subtitle}"
            )
            fig.update_layout(
                yaxis_title="Insurance Rate of Production (%)",
                yaxis_tickformat=".0%",
                xaxis_title="Month",
                legend_title_text="Country"
            )
            fig.update_traces(
                hovertemplate="<b>%{x|%b %Y}</b><br>Country: %{legendgroup}<br>Rate: %{y:.1%}<extra></extra>"
            )
            apply_month_axis(fig).show()

        elif chart_type.value == "Heatmap (Rate %)":
            heat = d.pivot_table(
                index="debtor_country",
                columns="month",
                values="insurance_rate_of_production",
                aggfunc="mean"
            ).sort_index()

            fig = px.imshow(
                heat,
                aspect="auto",
                title=f"Insurance Rate of Production Heatmap {subtitle}",
                labels=dict(x="Month", y="Country", color="Rate")
            )
            fig.update_layout(
                xaxis=dict(tickformat="%b %Y"),
            )
            fig.update_traces(
                hovertemplate="Country: %{y}<br>Month: %{x|%b %Y}<br>Rate: %{z:.1%}<extra></extra>"
            )
            fig.show()

        elif chart_type.value == "Line (Production Amount)":
            amt = (d.groupby(["month", "debtor_country"], as_index=False)
                     .agg(total_production=("total_production", "sum"),
                          insured_production=("insured_production", "sum")))

            long_df = amt.melt(
                id_vars=["month", "debtor_country"],
                value_vars=["total_production", "insured_production"],
                var_name="series",
                value_name="amount"
            )

            long_df["series"] = long_df["series"].replace({
                "total_production": "Total Production",
                "insured_production": "Insured Production"
            })

            fig = px.line(
                long_df,
                x="month",
                y="amount",
                color="debtor_country",
                line_dash="series",
                markers=True,
                title=f"Production Amounts (Total vs Insured) {subtitle}"
            )
            fig.update_layout(
                yaxis_title="Amount",
                yaxis_tickformat="~s",
                xaxis_title="Month",
                legend_title_text="Country / Series"
            )
            fig.update_traces(
                hovertemplate="<b>%{x|%b %Y}</b><br>Country: %{legendgroup}<br>Amount: %{y:,.0f}<extra></extra>"
            )
            apply_month_axis(fig).show()

def on_select_all_change(change):
    if change["new"] is True:
        country_select.value = tuple(countries)
    else:
        pass
    render()

select_all.observe(on_select_all_change, names="value")

from_date.observe(render, names="value")
to_date.observe(render, names="value")
chart_type.observe(render, names="value")
country_select.observe(render, names="value")

controls = widgets.HBox([from_date, to_date, chart_type])
selectors = widgets.VBox([select_all, country_select])

display(controls, selectors, output)
render()

HBox(children=(DatePicker(value=datetime.date(2025, 1, 1), description='From:', step=1), DatePicker(value=date…

VBox(children=(Checkbox(value=False, description='Select all countries'), SelectMultiple(description='Countrie…

Output()