# Breakpoint analysis for damaging winds or rain

Here, we explore the idea that wind/rain damage occurs above some threshold of wind speed, rain rate or rain accumulation. 

The damage survey results are classified into damaged/not damaged, and the rate of damaged buildings for a given wind speed/rain rate/rain accumulation is determined by binning the number of damaged buildings per wind speed interval. 

We then attempt to determine the threshold at which the damage rate increases significantly, using a Bayesian approach.

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

import pandas as pd
import numpy as np

import seaborn as sns
sns.set_context("poster")
sns.set_style("whitegrid")
sns.set_palette("hls")

Read in the damage dataset

In [None]:
#filename = "//nas/gemd/georisk/HaRIA_B_Wind/projects/impact_forecasting/data/exposure/NSW/April_2015_Impact_Assessment/Property_Damage_cleaned.csv"
filename = "C:/Workspace/data/derived/exposure/NSW/Property_Damage_cleaned.csv"
df = pd.read_csv(filename)

There are a number of blank fields throughout the data where a value was not entered into the dataset by the assessor. We need to keep track of the missing data, as well as the entered data, so we will find all 'NaN' values in the dataset, and change these to 'Not given' so we can include them in subsequent analyses.

In [None]:
df = df.fillna('Not given')

Now we add a column that indicates whether the building was damaged or not. Any building which is flagged as 'Minor', 'Major', 'Severe' or 'Destroyed' is tagged as damaged

In [None]:
damaged = np.zeros(len(df))
damaged[df['EICU_Degdamage'].isin(['Destroyed - 76-100%', 
                                 'Severe Impact - 51-75%', 
                                 'Major Impact - 26-50%', 
                                 'Minor Impact - 1-25%'])] = 1
df['Damaged'] = damaged

Determine the maximum wind speed for all data points, and set up bins to determine the rate of damage.

In [None]:
vmax = df[df['Damaged']>0]["combined_alltimes_maxwind_stage4_ens12"].max()
bins = np.arange(0, vmax, 0.5)

In [None]:
hist, edges = np.histogram(df[df['Damaged']==1]["combined_alltimes_maxwind_stage4_ens12"].values,
                            bins=len(bins),
                            density=False)

In [None]:
plt.bar(bins, hist, width=0.5)
_ = plt.xlabel("Model forecast wind speed (m/s)")
_ = plt.ylabel("Number of damaged buildings")


Now we will explore the onset of damage as a function of wind speed.

In [None]:
import pymc

In [None]:
switchpoint = pymc.DiscreteUniform('switchpoint',lower=0, upper=vmax)
early_mean = pymc.Exponential('early_mean', beta=1)
late_mean = pymc.Exponential('late_mean', beta=1)

In [None]:
@pymc.deterministic(plot=False)
def rate(s=switchpoint, e=early_mean, l=late_mean):
    out = np.empty(len(bins))
    out[:s] = e
    out[s:] = l
    return out

In [None]:
damage = pymc.Poisson('damage', mu=rate, value=hist, observed=True)
model = pymc.Model([switchpoint, early_mean, late_mean, rate, damage])

In [None]:
mcmc = pymc.MCMC(model)
mcmc.sample(iter=10000, burn=1000, thin=10)

In [None]:
plt.figure(figsize=(12,12))
plt.subplot(311);
plt.plot(mcmc.trace('switchpoint')[:]);
plt.ylabel("Switch point"); 
plt.subplot(312);
plt.plot(mcmc.trace('early_mean')[:]);
plt.ylabel("Early mean");
plt.subplot(313);
plt.plot(mcmc.trace('late_mean')[:]);
plt.xlabel("Iteration");
plt.ylabel("Late mean");
plt.tight_layout()

In [None]:
plt.figure(figsize=(14,3))
plt.subplot(131);
plt.hist(mcmc.trace('switchpoint')[:], 15,);
plt.xlabel("Switch point")
plt.ylabel("Distribution")
plt.subplot(132);
plt.hist(mcmc.trace('early_mean')[:], 15);
plt.xlabel("Early mean");
plt.subplot(133);
plt.hist(mcmc.trace('late_mean')[:], 15);
plt.xlabel("Late mean");
plt.tight_layout()

In [None]:
yp = np.round(mcmc.trace('switchpoint')[:].mean(), 0)
em = mcmc.trace('early_mean')[:].mean()
es = mcmc.trace('early_mean')[:].std()
lm = mcmc.trace('late_mean')[:].mean()
ls = mcmc.trace('late_mean')[:].std()
print((bins[int(yp)], em, es, lm, ls))

In [None]:
plt.figure(figsize=(12,6));
plt.bar(bins, hist, width=0.5);
plt.axvline(bins[int(yp)], color='k', ls='--', label="Mean breakpoint");
plt.plot([0, bins[int(yp)]], [em, em], '-b', lw=3, label="Average damage count below threshold");
plt.plot([bins[int(yp)], len(bins)], [lm, lm], '-r', lw=3, label="Average damage count above threshold");
plt.legend(loc=10, bbox_to_anchor=(0.5, -0.2), ncol=3)
plt.xlim(0, vmax);
plt.xlabel("Model forecast wind speed (m/s)");
plt.ylabel("Number damaged buildings");

Repeat this process, using rainfall rate as the predictor.

In [None]:
rmax = df[df['Damaged']>0]["combined_alltimes_accum_ls_rainrate_stage4_ens00"].max()
bins = np.linspace(0, rmax, 100)
hist, edges = np.histogram(df[df['Damaged']==1]["combined_alltimes_accum_ls_rainrate_stage4_ens00"].values,
                            bins=len(bins),
                            density=False)
plt.bar(bins, hist,width=(bins[1]-bins[0]))
_ = plt.xlabel("Modelled precipitation rate (kg/m^2/s)")
_ = plt.ylabel("Number of damaged buildings")

In [None]:
switchpoint = pymc.DiscreteUniform('switchpoint',lower=0, upper=rmax)
early_mean = pymc.Exponential('early_mean', beta=1)
late_mean = pymc.Exponential('late_mean', beta=1)

@pymc.deterministic(plot=False)
def rate(s=switchpoint, e=early_mean, l=late_mean):
    out = np.empty(len(bins))
    out[:s] = e
    out[s:] = l
    return out

damage = pymc.Poisson('damage', mu=rate, value=hist, observed=True)
model = pymc.Model([switchpoint, early_mean, late_mean, rate, damage])

In [None]:
mcmc = pymc.MCMC(model)
mcmc.sample(iter=10000, burn=1000, thin=10)

In [None]:
plt.figure(figsize=(12,12))
plt.subplot(311);
plt.plot(mcmc.trace('switchpoint')[:]);
plt.ylabel("Switch point"); 
plt.subplot(312);
plt.plot(mcmc.trace('early_mean')[:]);
plt.ylabel("Early mean");
plt.subplot(313);
plt.plot(mcmc.trace('late_mean')[:]);
plt.xlabel("Iteration");
plt.ylabel("Late mean");
plt.tight_layout()

In [None]:
plt.figure(figsize=(14,3))
plt.subplot(131);
plt.hist(mcmc.trace('switchpoint')[:], 15,);
plt.xlabel("Switch point")
plt.ylabel("Distribution")
plt.subplot(132);
plt.hist(mcmc.trace('early_mean')[:], 15);
plt.xlabel("Early mean");
plt.subplot(133);
plt.hist(mcmc.trace('late_mean')[:], 15);
plt.xlabel("Late mean");
plt.tight_layout()

In [None]:
yp = np.round(mcmc.trace('switchpoint')[:].mean(), 0)
em = mcmc.trace('early_mean')[:].mean()
es = mcmc.trace('early_mean')[:].std()
lm = mcmc.trace('late_mean')[:].mean()
ls = mcmc.trace('late_mean')[:].std()
print((bins[int(yp)], em, es, lm, ls))

In [None]:
plt.figure(figsize=(12,6));
plt.bar(bins, hist, width=bins[1]-bins[0]);
plt.axvline(bins[int(yp)], color='k', ls='--', label="Mean breakpoint");
plt.plot([0, bins[int(yp)]], [em, em], '-b', lw=3, label="Average damage count below threshold");
plt.plot([bins[int(yp)], len(bins)], [lm, lm], '-r', lw=3, label="Average damage count above threshold");
plt.legend(loc=10, bbox_to_anchor=(0.5, -0.2), ncol=3)
plt.xlim(0, rmax);
plt.xlabel("Rainfall rate (kg/m^2/s)");
plt.ylabel("Number damaged buildings");

TODO:

* Compare to NEXIS building points per bin (wind speed/rainfall rate) for the region
