Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Shannon Morrison
committed
Jan 31, 2020
1 parent
d921d5f
commit fd36325
Showing
5 changed files
with
246 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,7 @@ | ||
# 1.0.4 - January 2020 | ||
|
||
* Added PERT loss from FAIR methodology | ||
|
||
# 1.0 - January 2020 | ||
|
||
* Open source release. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
"""A loss model based on a single loss scenario with | ||
* low_loss = Low loss amount | ||
* high_loss = High loss amount | ||
* min_freq: The lowest number of times a loss will occur | ||
* max_freq: The highest number of times a loss will occur | ||
* most_likely_freq: The most likely number of times a loss will occur over some interval of time. | ||
* kurtosis: Defaults to 4. Controls the shape of the distribution. Higher values cause a sharper peak. | ||
The range low_loss -> high_loss should represent the 90% confidence interval | ||
that the loss will fall in that range. | ||
These values are then fit to a lognormal | ||
distribution so that they fall at the 5% and 95% cumulative probability points. | ||
The range min_freq -> max_freq should represent the 90% confidence interval | ||
that the frequency will fall in that range. | ||
The most_likely_freq will be used to skew the PERT distribution so that more of these values occur in the simulation. | ||
The kurtosis will be used to control the shape of the distribution; even more of the most_likely_freq values will | ||
occur in the simulation with higher kurtosis. | ||
These values are then used to create Modified PERT distribution. | ||
""" | ||
import math | ||
|
||
import numpy as np | ||
from scipy.stats import lognorm, mode, norm | ||
import tensorflow_probability as tfp | ||
|
||
|
||
tfp = tfp.experimental.substrates.numpy | ||
tfd = tfp.distributions | ||
factor = -0.5 / norm.ppf(0.05) | ||
|
||
|
||
class PERTLoss: | ||
def __init__(self, low_loss, high_loss, min_freq, max_freq, most_likely_freq, kurtosis=4): | ||
if min_freq >= max_freq: | ||
# Min frequency must exceed max frequency | ||
raise AssertionError | ||
if not min_freq <= most_likely_freq <= max_freq: | ||
# Most likely should be between min and max frequencies. | ||
raise AssertionError | ||
if low_loss >= high_loss: | ||
# High loss must exceed low loss | ||
raise AssertionError | ||
|
||
# Set up the lognormal distribution | ||
mu = (math.log(low_loss) + math.log(high_loss)) / 2. # Average of the logn of low/high | ||
shape = factor * (math.log(high_loss) - math.log(low_loss)) # Standard deviation | ||
self.magnitude_distribution = lognorm(shape, scale=math.exp(mu)) | ||
|
||
# Set up the PERT distribution | ||
# From FAIR: the most likely frequency will set the skew/peak, and | ||
# the "confidence" in the most likely frequency will set the kurtosis/temp of the distribution. | ||
self.frequency_distribution = tfd.PERT(low=min_freq, peak=most_likely_freq, high=max_freq, temperature=kurtosis) | ||
|
||
def annualized_loss(self): | ||
"""Expected mean loss per year as scaled by the most likely frequency | ||
:returns: Scalar of expected mean loss on an annualized basis.""" | ||
|
||
return self.frequency_distribution.mode().flat[0] * self.magnitude_distribution.mean() | ||
|
||
def single_loss(self): | ||
"""Draw a single loss amount. Not scaled by probability of occurrence. | ||
:returns: Scalar value of a randomly generated single loss amount.""" | ||
|
||
return self.magnitude_distribution.rvs() | ||
|
||
def simulate_losses_one_year(self): | ||
"""Generate a random frequency and random magnitude from distributions. | ||
:returns: Scalar value of one sample loss exposure.""" | ||
sample_frequency = self.frequency_distribution.sample(1)[0] | ||
sample_magnitude = self.single_loss() | ||
loss = sample_frequency * sample_magnitude | ||
return loss | ||
|
||
def simulate_years(self, n): | ||
"""Draw randomly to simulate n years of possible losses. | ||
:arg: n = Number of years to simulate | ||
:returns: Numpy array of shape (n,) with loss amounts per year.""" | ||
# Create array of possible frequencies | ||
frequency_array = self.frequency_distribution.sample(n) | ||
# The loss amounts for all the losses across all the years, generated all at once. | ||
# This is much higher performance than generating one year at a time. | ||
magnitude_array = self.magnitude_distribution.rvs(size=n) | ||
# Multiply frequency times magnitude from each array. | ||
loss_array = frequency_array * magnitude_array | ||
return loss_array | ||
|
||
@staticmethod | ||
def summarize_loss(loss_array): | ||
"""Get statistics about a numpy array. | ||
Risk is a range of possibilities, not just one outcome. | ||
:arg: loss_array = Numpy array of simulated losses | ||
:returns: Dictionary of statistics about the loss | ||
""" | ||
percentiles = np.percentile(loss_array, [10, 50, 90]).astype(int) | ||
loss_summary = {'minimum': loss_array.min().astype(int), | ||
'tenth_percentile': percentiles[0], | ||
'mode': mode(loss_array)[0][0].astype(int), | ||
'median': percentiles[1], | ||
'ninetieth_percentile': percentiles[2], | ||
'maximum': loss_array.max().astype(int)} | ||
return loss_summary |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
import unittest | ||
|
||
from riskquant import pertloss | ||
|
||
|
||
class Test(unittest.TestCase): | ||
def setUp(self): | ||
min_freq = 0.1 | ||
max_freq = .7 | ||
most_likely = .3 | ||
kurtosis = 1 | ||
low_loss = 1 | ||
high_loss = 10 | ||
self.s = pertloss.PERTLoss(low_loss, high_loss, min_freq, max_freq, most_likely, kurtosis=kurtosis) | ||
|
||
def testAnnualized(self): | ||
# Returns the mean of the configured distribution scaled by the mode of frequency distribution | ||
self.assertAlmostEqual(self.s.annualized_loss(), 1.2120038962444237) | ||
|
||
def testDistribution(self): | ||
# We defined the cdf(low) ~ 0.05 and the cdf(hi) ~ 0.95 so that | ||
# it would be the 90% confidence interval. Check that it's true. | ||
self.assertTrue(0.049 < self.s.magnitude_distribution.cdf(1) < 0.051) | ||
self.assertTrue(0.949 < self.s.magnitude_distribution.cdf(10) < 0.951) | ||
|
||
def testSingleLoss(self): | ||
# The mean of many single losses should be close to the | ||
# mean of the distribution. We are not using probability p here. | ||
iterations = 10000 | ||
mean_loss = sum([self.s.single_loss() for _ in range(iterations)]) / iterations | ||
self.assertGreater(mean_loss, 3.9) | ||
self.assertLess(mean_loss, 4.2) | ||
|
||
def testSimulateLossesOneYear(self): | ||
# Should return a list of zero or more losses that fall mostly within the | ||
# configured range (1, 10) | ||
losses = [] | ||
for _ in range(100): | ||
losses.append(self.s.simulate_losses_one_year()) | ||
self.assertGreater(sum(losses), 0.05) | ||
self.assertLess(sum(losses), 10000) | ||
|
||
def testSimulateYears(self): | ||
# Should return a list of length == years | ||
# whose mean is close to the annualized loss. | ||
years = 10000 | ||
losses = self.s.simulate_years(years) | ||
self.assertEqual(len(losses), years) | ||
mean_loss = sum(losses) / years | ||
self.assertGreater(mean_loss, 0.5) | ||
self.assertLess(mean_loss, 1.5) | ||
|
||
def testMinMaxFrequency(self): | ||
# Min must be less than max. | ||
with self.assertRaises(AssertionError): | ||
pertloss.PERTLoss(10, 100, .7, .1, .3) # min > max | ||
|
||
def testMostLikelyFrequency(self): | ||
# Most likely frequency must be between min and max. | ||
with self.assertRaises(AssertionError): | ||
pertloss.PERTLoss(10, 100, .1, .7, .8) # most_likely > max | ||
|
||
def testLowHighLoss(self): | ||
# High loss must exceed low loss | ||
with self.assertRaises(AssertionError): | ||
pertloss.PERTLoss(100, 10, .1, .7, .3) # min > max | ||
|
||
def testSummary(self): | ||
loss_array = self.s.simulate_years(1000) | ||
summary = self.s.summarize_loss(loss_array) | ||
self.assertEqual(summary['minimum'], 0) | ||
self.assertEqual(summary['tenth_percentile'], 0) | ||
self.assertEqual(summary['mode'], 0) | ||
self.assertGreaterEqual(summary['median'], 1) | ||
self.assertGreaterEqual(summary['ninetieth_percentile'], 2) | ||
self.assertGreater(summary['maximum'], 5) |