In [38]:
import polars as pl
import numpy as np
import hvplot.pandas

In [4]:
# auxiliar functions
def show_polars(
    df, all_columns=True, all_rows=False, number_of_rows=None, width_chars=1000
):
    """This function allows to print polars dataframe with a cool style

    Args:
        df (_type_): _description_
        all_columns (bool, optional): _description_. Defaults to True.
        all_rows (bool, optional): _description_. Defaults to False.
        number_of_rows (_type_, optional): _description_. Defaults to None.
    """
    if all_columns:
        tbl_cols = -1
    else:
        tbl_cols = None
    if all_rows:
        tbl_rows = -1
    elif number_of_rows != None:
        tbl_rows = number_of_rows
    else:
        tbl_rows = None

    with pl.Config(
        set_fmt_float="full",
        thousands_separator=",",
        decimal_separator=".",
        float_precision=2,
        tbl_rows=tbl_rows,
        tbl_cols=tbl_cols,
        tbl_width_chars=width_chars,
    ):

        print(df)


def summary(df, width_chars=1000):

    with pl.Config(
        set_fmt_float="full",
        thousands_separator=",",
        decimal_separator=".",
        float_precision=3,
        tbl_rows=-1,
        tbl_width_chars=width_chars,
    ):
        empty = (
            df.fill_nan(None)
            .describe()
            .filter(pl.col("statistic").is_in(["count", "null_count", "min", "max"]))
            .transpose(
                include_header=True, column_names=["count", "null_count", "min", "max"]
            )
        ).filter(pl.col("column") != "statistic")
        print(empty)


def value_counts_df(df, column_list):
    with pl.Config(
        set_fmt_float="full",
        thousands_separator=",",
        decimal_separator=".",
        float_precision=3,
        tbl_rows=-1,
    ):
        for i in column_list:
            with pl.Config(tbl_rows=-1):
                h = (
                    df.get_column(i)
                    .value_counts(normalize=True)
                    .sort(by="proportion", descending=True)
                )
                print(h)

In [42]:
path = r"C:\Users\abguerr\OneDrive - Chubb\2024\Ratemaking-Brazil\Archivo\parquet_data"
name = "ELECTRICAL DAMAGE"

final_df = pl.read_parquet(source=path + f"\\{name}.parquet")
show_polars(final_df)

shape: (477_111, 37)
┌─────────────────┬───────────────────┬──────────────┬─────────────────┬─────────┬─────────────────────────────────┬─────────────────────────────────┬──────────────┬────────────┬───────────┬────────────┬───────────────────────────────┬──────────────┬─────────────────────┬──────────────┬────────────────────────────┬─────────────────────┬────────────┬───────┬──────┬───────────────┬────────────┬────────────┬───────────────────────────────┬─────────────────────────────────┬─────────────────────────────────┬─────────────────────────────────┬───────────────────┬────────────────────┬───────┬─────────┬────────────┬─────────────┬──────────────────┬─────────────────────┬──────────────┬──────────────────┐
│ cobertura       ┆ coverage          ┆ TIPO_EMISION ┆ PRODUCT_NAME    ┆ RUBRICA ┆ INDUSTRY                        ┆ OCCUPANCE                       ┆ MACRO_REGION ┆ Extintores ┆ Hidrantes ┆ Sprinklers ┆ Detectores_de_Fumaça_ou_Calor ┆ CO2_ou_FM200 ┆ Brigada_de_Incendio ┆ Al

In [43]:
for col in ["PRODUCT_NAME", "region", "Extintores", "Hidrantes", "Sprinklers"]:

    by_column = (
        final_df.lazy()
        .group_by(col)
        .agg(pl.col(["exposition", "claim_count", "ultimate_il_orig"]).sum())
        .with_columns(
            frecuencia=pl.col("claim_count") / pl.col("exposition"),
            severidad=pl.col("ultimate_il_orig") / pl.col("claim_count"),
            burningCost=pl.col("ultimate_il_orig") / pl.col("exposition"),
        )
        .sort(["exposition"])
        .collect()
    )

    # we create the graph, we have three, scatter plot, line and bar. Then we
    # join them with the * operator hollowview
    severity_freq = []

    for variable in ["frecuencia", "severidad"]:

        Y = (
            by_column.to_pandas()
            .hvplot.bar(
                title=f"{variable} vs exposición",
                x=f"{col}",
                y="exposition",
                c="exposition",
                legend="right",
                width=600,
                grid=True,
                cmap="Blues",
                rot=45,
            )
            .opts(xrotation=45)
        )

        W = by_column.to_pandas().hvplot.line(
            x=f"{col}",
            y=[f"{variable}"],
            shared_axes=False,
            width=600,
            color="teal",
            grid=True,
            line_width=1.5,
        )

        R = (
            by_column.to_pandas()
            .hvplot.scatter(x=f"{col}", y=[f"{variable}"], grid=True)
            .opts(
                color="black",
                size=5,
                marker="o",
            )
        )

        H = Y * W * R  # .opts(multi_y=True)
        severity_freq.append(H.opts(multi_y=True))

    display(severity_freq[0] + severity_freq[1])

In [32]:
df_random = final_df.with_columns(
    random_number=np.random.randint(1, 10, size=final_df.shape[0])
)

df_random.select(pl.col("random_number")).hvplot.hist(
    y=None,
    grid=True,
    alpha=0.5,
    width=1000,
    legend="right",
    bin_range=(0, 10),
    bins=10,
    title="random numbers",
)

training_frequency = df_random.filter(pl.col("random_number") <= 8)  # 80%
show_polars(training_frequency)
test_frequency = df_random.filter(pl.col("random_number") > 8)
show_polars(test_frequency)

training_frequency.shape[0] + test_frequency.shape[0] == df_random.shape[0]

shape: (424_325, 38)
┌─────────────────┬───────────────────┬──────────────┬─────────────────┬─────────┬─────────────────────────────────┬─────────────────────────────────┬──────────────┬────────────┬───────────┬────────────┬───────────────────────────────┬──────────────┬─────────────────────┬──────────────┬────────────────────────────┬─────────────────────┬────────────┬───────┬──────┬───────────────┬────────────┬────────────┬───────────────────────────────┬─────────────────────────────────┬─────────────────────────────────┬─────────────────────────────────┬───────────────────┬────────────────────┬───────┬─────────┬────────────┬─────────────┬──────────────────┬─────────────────────┬──────────────┬──────────────────┬───────────────┐
│ cobertura       ┆ coverage          ┆ TIPO_EMISION ┆ PRODUCT_NAME    ┆ RUBRICA ┆ INDUSTRY                        ┆ OCCUPANCE                       ┆ MACRO_REGION ┆ Extintores ┆ Hidrantes ┆ Sprinklers ┆ Detectores_de_Fumaça_ou_Calor ┆ CO2_ou_FM200 ┆ Brigada_

True

## Red neuronal

In [None]:
from sklearn.model_selection import train_test_split

# split out the validation set
train_x, val_x, train_y, val_y, train_evy, val_evy = train_test_split(
    df.drop(labels=["claim_nb", "exposure"], axis=1),
    df["claim_nb"],
    df["exposure"],
    test_size=0.15,
    random_state=0,
    shuffle=True,
)

# now split train and test
train_x, test_x, train_y, test_y, train_evy, test_evy = train_test_split(
    train_x, train_y, train_evy, test_size=0.2, random_state=0, shuffle=True
)

In [None]:
class NeuralNetwork(torch.nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()

        # linear
        self.F_hidden_one = torch.nn.Linear(75 + 1, 250)
        self.F_hidden_two = torch.nn.Linear(250, 250)
        self.F_output = torch.nn.Linear(250, 1)

        # add in drop out
        self.dropout_one = torch.nn.Dropout(p=0.25)
        self.dropout_two = torch.nn.Dropout(p=0.25)

        # initialise weights
        # He initialisation
        torch.nn.init.kaiming_uniform_(self.F_hidden_one.weight)
        torch.nn.init.kaiming_uniform_(self.F_hidden_two.weight)
        torch.nn.init.kaiming_uniform_(self.F_output.weight)

        # initialise the final bias
        torch.nn.init.constant_(self.F_output.bias, y_hat)

    def forward(self, x):
        # ELU activations
        elu = torch.nn.ELU(alpha=1)

        # calculate F
        F = self.dropout_one(x)
        F = self.F_hidden_one(F)
        F = elu(F)
        F = self.dropout_two(F)
        F = self.F_hidden_two(F)
        F = elu(F)
        F = self.F_output(F)
        F = torch.exp(F)

        return F