In [1]:
import polars as pl
import json
import shap
import numpy as np
from great_tables import GT, md, html

import plotly.graph_objects as go

from utils.visual_utils import aggregate_frequency_df, create_frequency_table, save_table_to_html, plot_aggregated_data


In [2]:
holdout = pl.read_parquet('./processed-data/holdout_frequency_predictions.parquet')

In [3]:
with open('./config/continous_feature_visuals.json', 'r') as f:
    continous_feature_config = json.load(f)

In [4]:
feature = 'Area'

aggregated_data = aggregate_frequency_df(holdout, feature, continous_feature_config)
frequency_table = create_frequency_table(aggregated_data, feature = feature)
save_table_to_html(frequency_table, path=f'frequency-table-{feature}.html')
frequency_table

Frequency - Actual vs Predicted - Area,Frequency - Actual vs Predicted - Area,Frequency - Actual vs Predicted - Area,Frequency - Actual vs Predicted - Area,Frequency - Actual vs Predicted - Area,Frequency - Actual vs Predicted - Area
Area,Exposure,ClaimCount,ClaimCountPrediction,Frequency,FrequencyPrediction
A,12432,978,999.9109209343683,7.9%,8.0%
B,8572,728,741.7203320277499,8.5%,8.7%
C,21036,1931,1989.154182705145,9.2%,9.5%
D,15436,1587,1650.587332277531,10.3%,10.7%
E,12701,1601,1518.6238345612092,12.6%,12.0%
F,1622,251,223.2713080991092,15.5%,13.8%
Trained on experiment: Not Logged,Trained on experiment: Not Logged,Trained on experiment: Not Logged,Trained on experiment: Not Logged,Trained on experiment: Not Logged,Trained on experiment: Not Logged


In [5]:
plot_aggregated_data(visual_data = aggregated_data, 
                        feature = feature, 
                        target = 'Frequency', 
                        prediction = 'FrequencyPrediction',
                        exposure = 'Exposure')

In [6]:
feature = 'DrivAge'

aggregated_data = aggregate_frequency_df(holdout, feature, continous_feature_config)
frequency_table = create_frequency_table(aggregated_data, feature = feature)
save_table_to_html(frequency_table, path=f'frequency-table-{feature}.html')
frequency_table

Frequency - Actual vs Predicted - DrivAge,Frequency - Actual vs Predicted - DrivAge,Frequency - Actual vs Predicted - DrivAge,Frequency - Actual vs Predicted - DrivAge,Frequency - Actual vs Predicted - DrivAge,Frequency - Actual vs Predicted - DrivAge
DrivAge,Exposure,ClaimCount,ClaimCountPrediction,Frequency,FrequencyPrediction
20.0,2514,460,468.8260415069985,18.3%,18.6%
25.0,5111,538,574.5780922850029,10.5%,11.2%
30.0,7872,673,723.5405412197442,8.5%,9.2%
35.0,8771,789,768.8686048259742,9.0%,8.8%
40.0,8794,838,843.7578851454269,9.5%,9.6%
45.0,8836,927,893.8520362774489,10.5%,10.1%
50.0,9183,967,908.2257399769513,10.5%,9.9%
55.0,6699,567,614.9529984202571,8.5%,9.2%
60.0,4397,426,406.4234748336292,9.7%,9.2%
65.0,3579,322,335.97835230696063,9.0%,9.4%


In [7]:
frequency_table.save('./frequency-table.png')


Frequency - Actual vs Predicted - DrivAge,Frequency - Actual vs Predicted - DrivAge,Frequency - Actual vs Predicted - DrivAge,Frequency - Actual vs Predicted - DrivAge,Frequency - Actual vs Predicted - DrivAge,Frequency - Actual vs Predicted - DrivAge
DrivAge,Exposure,ClaimCount,ClaimCountPrediction,Frequency,FrequencyPrediction
20.0,2514,460,468.8260415069985,18.3%,18.6%
25.0,5111,538,574.5780922850029,10.5%,11.2%
30.0,7872,673,723.5405412197442,8.5%,9.2%
35.0,8771,789,768.8686048259742,9.0%,8.8%
40.0,8794,838,843.7578851454269,9.5%,9.6%
45.0,8836,927,893.8520362774489,10.5%,10.1%
50.0,9183,967,908.2257399769513,10.5%,9.9%
55.0,6699,567,614.9529984202571,8.5%,9.2%
60.0,4397,426,406.4234748336292,9.7%,9.2%
65.0,3579,322,335.97835230696063,9.0%,9.4%


In [8]:
plot_aggregated_data(visual_data = aggregated_data, 
                        feature = feature, 
                        target = 'Frequency', 
                        prediction = 'FrequencyPrediction',
                        exposure = 'Exposure')

In [9]:
from reportlab.platypus import SimpleDocTemplate, Image, Spacer, Paragraph
from reportlab.lib.pagesizes import letter
from reportlab.lib.styles import getSampleStyleSheet
import plotly.express as px
import io

In [10]:
features = [
        "VehPower",
        "VehAge",
        "DrivAge",
        "BonusMalus",
        "VehBrand",
        "VehGas",
        "Area",
        "Density",
        "Region"
    ]

In [22]:
from reportlab.platypus import SimpleDocTemplate, Image, Spacer, Paragraph, PageBreak, KeepTogether
from reportlab.lib.pagesizes import letter
from reportlab.lib.styles import getSampleStyleSheet
import io
import tempfile
import os
from PIL import Image as PILImage

# Page and margin configuration
PAGE_WIDTH, PAGE_HEIGHT = letter
TOP_MARGIN = 72
BOTTOM_MARGIN = 72
LEFT_MARGIN = 72
RIGHT_MARGIN = 72
AVAILABLE_HEIGHT = PAGE_HEIGHT - TOP_MARGIN - BOTTOM_MARGIN

# --- Helper: Save GT table to PNG in memory (Windows-safe) ---
def render_gt_table_as_image(gt_obj, scale=2.0):
    tmp_path = tempfile.mktemp(suffix=".png")
    try:
        gt_obj.save(tmp_path, scale=scale)
        with open(tmp_path, "rb") as f:
            buf = io.BytesIO(f.read())
        buf.seek(0)
    finally:
        if os.path.exists(tmp_path):
            os.remove(tmp_path)
    return buf

# --- Helper: Scale images proportionally ---
def get_scaled_image(buf, max_width, max_height):
    buf.seek(0)
    pil_img = PILImage.open(buf)
    orig_width, orig_height = pil_img.size

    scale = min(max_width / orig_width, max_height / orig_height)
    new_width = orig_width * scale
    new_height = orig_height * scale

    buf.seek(0)
    return Image(buf, width=new_width, height=new_height)

def generate_pdf_report(features, output_file="report.pdf"):
    doc = SimpleDocTemplate(
        output_file, pagesize=letter,
        topMargin=TOP_MARGIN, bottomMargin=BOTTOM_MARGIN,
        leftMargin=LEFT_MARGIN, rightMargin=RIGHT_MARGIN
    )
    elements = []
    styles = getSampleStyleSheet()

    image_max_width = PAGE_WIDTH - LEFT_MARGIN - RIGHT_MARGIN
    chart_max_height = AVAILABLE_HEIGHT * 0.6
    table_max_height = AVAILABLE_HEIGHT * 0.35  # small buffer

    for i, feature in enumerate(features):
        # --- Plotly chart ---
        aggregated_data = aggregate_frequency_df(holdout, feature, continous_feature_config)
        fig_chart = plot_aggregated_data(
            visual_data=aggregated_data,
            feature=feature,
            target='Frequency',
            prediction='FrequencyPrediction',
            exposure='Exposure'
        )
        buf_chart = io.BytesIO()
        fig_chart.write_image(buf_chart, format="png")
        buf_chart.seek(0)
        img_chart = get_scaled_image(buf_chart, max_width=image_max_width, max_height=chart_max_height)

        # --- GT table ---
        gt_table = create_frequency_table(aggregated_data, feature=feature)
        buf_table = render_gt_table_as_image(gt_table, scale=2.0)
        img_table = get_scaled_image(buf_table, max_width=image_max_width, max_height=table_max_height)

        # --- Heading + table together (small, safe) ---
        elements.append(KeepTogether([
            Paragraph(f"Report for: {feature}", styles['Heading2']),
            Spacer(1, 6),
            img_table,
            Spacer(1, 6)
        ]))

        # --- Chart separately ---
        elements.append(img_chart)
        elements.append(Spacer(1, 6))

        # Page break after each feature except the last one
        if i < len(features) - 1:
            elements.append(PageBreak())

    doc.build(elements)
    print(f"PDF report saved to {output_file}")


In [23]:
generate_pdf_report(features, output_file="report.pdf")

PDF report saved to report.pdf
