In [1]:
import pandas as pd

In [3]:
# Load sheets
file_path = "MunichRe_ClimateClaims_Analytics_Pipeline.xlsx"
raw_df = pd.read_excel(file_path, sheet_name="Raw_Data")
cleaning_df = pd.read_excel(file_path, sheet_name="Data Cleaning")

In [4]:
# Handle missing values
raw_df['Claim_Amount (€)'].fillna(raw_df['Claim_Amount (€)'].median(), inplace=True)
raw_df['Temperature (°C)'].fillna(raw_df['Temperature (°C)'].mean(), inplace=True)
raw_df['Rainfall (mm)'].fillna(0, inplace=True)

In [5]:
# Standardize Event_Type
raw_df['Event_Type'] = raw_df['Event_Type'].str.lower().str.strip()

In [6]:
# Convert Date
raw_df['Year'] = pd.DatetimeIndex(raw_df['Date']).year
raw_df['Month'] = pd.DatetimeIndex(raw_df['Date']).month

In [7]:
def temp_range(temp):
    if temp < 10:
        return "Cold"
    elif temp <= 25:
        return "Moderate"
    else:
        return "Hot"

In [8]:
def rain_category(rain):
    if rain == 0:
        return "None"
    elif rain < 20:
        return "Light"
    else:
        return "Heavy"

In [9]:
def claim_risk(amount):
    if amount < 7000:
        return "Low"
    elif amount <= 13000:
        return "Medium"
    else:
        return "High"

In [10]:
raw_df['Temp_Range'] = raw_df['Temperature (°C)'].apply(temp_range)
raw_df['Rain_Category'] = raw_df['Rainfall (mm)'].apply(rain_category)
raw_df['Claim_Risk_Level'] = raw_df['Claim_Amount (€)'].apply(claim_risk)

In [11]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [12]:
# Encode features
df = raw_df.copy()
le = LabelEncoder()
df['Event_Type'] = le.fit_transform(df['Event_Type'])
df['Temp_Range'] = le.fit_transform(df['Temp_Range'])
df['Rain_Category'] = le.fit_transform(df['Rain_Category'])
df['Claim_Risk_Level'] = le.fit_transform(df['Claim_Risk_Level'])

In [13]:
# Features & labels
X = df[['Event_Type', 'Temperature (°C)', 'Rainfall (mm)', 'Year', 'Month']]
y = df['Claim_Risk_Level']

In [14]:
# Train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
clf = RandomForestClassifier()
clf.fit(X_train, y_train)

In [15]:
print("Model Accuracy:", clf.score(X_test, y_test))

Model Accuracy: 0.5


In [16]:
from transformers import pipeline

In [17]:
# Sentiment classification or severity tagging
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development





model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu


In [18]:
pip install ipywidgets==7.7.5

Defaulting to user installation because normal site-packages is not writeable
Collecting ipywidgets==7.7.5
  Downloading ipywidgets-7.7.5-py2.py3-none-any.whl.metadata (1.9 kB)
Collecting widgetsnbextension~=3.6.4 (from ipywidgets==7.7.5)
  Downloading widgetsnbextension-3.6.10-py2.py3-none-any.whl.metadata (1.3 kB)
Collecting jupyterlab-widgets<3,>=1.0.0 (from ipywidgets==7.7.5)
  Downloading jupyterlab_widgets-1.1.11-py3-none-any.whl.metadata (3.7 kB)
Downloading ipywidgets-7.7.5-py2.py3-none-any.whl (123 kB)
   ---------------------------------------- 0.0/123.9 kB ? eta -:--:--
   --------- ------------------------------ 30.7/123.9 kB 1.3 MB/s eta 0:00:01
   ---------------------------------------- 123.9/123.9 kB 1.8 MB/s eta 0:00:00
Downloading jupyterlab_widgets-1.1.11-py3-none-any.whl (246 kB)
   ---------------------------------------- 0.0/246.9 kB ? eta -:--:--
   ---------------------------------------- 246.9/246.9 kB 7.6 MB/s eta 0:00:00
Downloading widgetsnbextension-3.6.10-

In [22]:
# Sentiment classification or severity tagging
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

Device set to use cpu


In [23]:
# Example
result = classifier(
    "Basement filled with rainwater and short-circuited the heater.",
    candidate_labels=["severe", "moderate", "minor"]
)

In [24]:
print(result)

{'sequence': 'Basement filled with rainwater and short-circuited the heater.', 'labels': ['severe', 'moderate', 'minor'], 'scores': [0.8088679909706116, 0.15254174172878265, 0.0385902039706707]}
