/
pertloss.py
114 lines (86 loc) · 4.75 KB
/
pertloss.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
"""A loss model based on a single loss scenario with
* low_loss = Low loss amount
* high_loss = High loss amount
* min_freq: The lowest number of times a loss will occur
* max_freq: The highest number of times a loss will occur
* most_likely_freq: The most likely number of times a loss will occur over some interval of time.
* kurtosis: Defaults to 4. Controls the shape of the distribution. Higher values cause a sharper peak.
The range low_loss -> high_loss should represent the 90% confidence interval
that the loss will fall in that range.
These values are then fit to a lognormal
distribution so that they fall at the 5% and 95% cumulative probability points.
The range min_freq -> max_freq should represent the 90% confidence interval
that the frequency will fall in that range.
The most_likely_freq will be used to skew the PERT distribution so that more of these values occur in the simulation.
The kurtosis will be used to control the shape of the distribution; even more of the most_likely_freq values will
occur in the simulation with higher kurtosis.
These values are then used to create Modified PERT distribution.
"""
import math
import numpy as np
from scipy.stats import lognorm, mode, norm
import tensorflow_probability as tfp
tfp = tfp.experimental.substrates.numpy
tfd = tfp.distributions
factor = -0.5 / norm.ppf(0.05)
class PERTLoss:
def __init__(self, low_loss, high_loss, min_freq, max_freq, most_likely_freq, kurtosis=4):
if min_freq >= max_freq:
# Min frequency must exceed max frequency
raise AssertionError
if not min_freq <= most_likely_freq <= max_freq:
# Most likely should be between min and max frequencies.
raise AssertionError
if low_loss >= high_loss:
# High loss must exceed low loss
raise AssertionError
# Set up the lognormal distribution
mu = (math.log(low_loss) + math.log(high_loss)) / 2. # Average of the logn of low/high
shape = factor * (math.log(high_loss) - math.log(low_loss)) # Standard deviation
self.magnitude_distribution = lognorm(shape, scale=math.exp(mu))
# Set up the PERT distribution
# From FAIR: the most likely frequency will set the skew/peak, and
# the "confidence" in the most likely frequency will set the kurtosis/temp of the distribution.
self.frequency_distribution = tfd.PERT(low=min_freq, peak=most_likely_freq, high=max_freq, temperature=kurtosis)
def annualized_loss(self):
"""Expected mean loss per year as scaled by the most likely frequency
:returns: Scalar of expected mean loss on an annualized basis."""
return self.frequency_distribution.mode().flat[0] * self.magnitude_distribution.mean()
def single_loss(self):
"""Draw a single loss amount. Not scaled by probability of occurrence.
:returns: Scalar value of a randomly generated single loss amount."""
return self.magnitude_distribution.rvs()
def simulate_losses_one_year(self):
"""Generate a random frequency and random magnitude from distributions.
:returns: Scalar value of one sample loss exposure."""
sample_frequency = self.frequency_distribution.sample(1)[0]
sample_magnitude = self.single_loss()
loss = sample_frequency * sample_magnitude
return loss
def simulate_years(self, n):
"""Draw randomly to simulate n years of possible losses.
:arg: n = Number of years to simulate
:returns: Numpy array of shape (n,) with loss amounts per year."""
# Create array of possible frequencies
frequency_array = self.frequency_distribution.sample(n)
# The loss amounts for all the losses across all the years, generated all at once.
# This is much higher performance than generating one year at a time.
magnitude_array = self.magnitude_distribution.rvs(size=n)
# Multiply frequency times magnitude from each array.
loss_array = frequency_array * magnitude_array
return loss_array
@staticmethod
def summarize_loss(loss_array):
"""Get statistics about a numpy array.
Risk is a range of possibilities, not just one outcome.
:arg: loss_array = Numpy array of simulated losses
:returns: Dictionary of statistics about the loss
"""
percentiles = np.percentile(loss_array, [10, 50, 90]).astype(int)
loss_summary = {'minimum': loss_array.min().astype(int),
'tenth_percentile': percentiles[0],
'mode': mode(loss_array)[0][0].astype(int),
'median': percentiles[1],
'ninetieth_percentile': percentiles[2],
'maximum': loss_array.max().astype(int)}
return loss_summary