# Cusum Validation Notebook

This is a small notebook to serve as a test for the CUSUM and deviation detection functions.
In particular, it ensures that both `calculate_cusum` and `detect_cusum_deviations` work the same
with both arrays of variables and single variable arrays.

It generates fake radius/temperature error data by adding `DEVIATION_DELTA` for `DEVIATION_LENGTH`
at `DEVIATION_COUNT` random locations. It then uses the provided `CUSUM_TOLERANCE` and `CUSUM_THRESHOLD`
to calculate CUSUM and identify deviations for both multiple variable and single variable input arrays.
It then graphs both results so that the user can ensure that both single variable and multi-variable versions
output the same graph.

This notebook also serves as a brief implementation guide for `calculate_cusum` and `detect_cusum_divergences`.

In [None]:
from droplet_approximation import *
import numpy as np
import matplotlib.pyplot as plt

In [None]:
NUMBER_TIME_STEPS = 1000
TIME_START        = 0
TIME_END          = 10

DEVIATION_DELTA  = 0.3
DEVIATION_LENGTH = 20
DEVIATION_COUNT  = 3

# Vital that these are NP arrays
CUSUM_TOLERANCE = np.array( [0.2, 0.28] )
CUSUM_THRESHOLD = np.array( [0.8, 3.0] )

In [None]:
times = np.linspace( TIME_START, TIME_END, NUMBER_TIME_STEPS )
rt_difference_data = np.zeros( ( 2, NUMBER_TIME_STEPS ) )

In [None]:
for row in rt_difference_data:
    for deviation_index in range( DEVIATION_COUNT ):
        deviation_start = np.random.randint( 0, NUMBER_TIME_STEPS )
        deviation_end = min( deviation_start + DEVIATION_LENGTH, NUMBER_TIME_STEPS )
        row[deviation_start:deviation_end] += DEVIATION_DELTA * np.random.choice( [-1.0, 1.0] )

In [None]:
# Test with merged arrays

cusum_data = calculate_cusum( rt_difference_data, CUSUM_TOLERANCE )
deviation_masks  = detect_cusum_deviations( cusum_data, CUSUM_THRESHOLD ).reshape( (4, -1) )

deviation_direction_vector = np.array( [DeviationDirection.POSITIVE,
                                        DeviationDirection.NEGATIVE,
                                        DeviationDirection.POSITIVE,
                                        DeviationDirection.NEGATIVE] )
deviation_parameter_vector = np.array( [DeviationParameter.RADIUS,
                                        DeviationParameter.RADIUS,
                                        DeviationParameter.TEMPERATURE,
                                        DeviationParameter.TEMPERATURE] )

deviation_counts = deviation_masks.sum( axis=1 )

deviation_directions   = np.hstack( [np.full( count, direction ) for count, direction
                                     in zip( deviation_counts, deviation_direction_vector )] )
deviation_parameters   = np.hstack( [np.full( count, direction ) for count, direction
                                     in zip( deviation_counts, deviation_parameter_vector )] )
deviation_times        = np.hstack( [times[mask] for mask in deviation_masks] )

In [None]:
fig_h, ax_h = plt.subplots( 2, 2, figsize=(12,8))
fig_h.suptitle("CUSUM Test Run passing full variable arrays")

ax_h[0][0].set_title("Absolute Radius Error")
ax_h[0][0].set_xlabel("time")
ax_h[0][0].set_ylabel("radius error (m)")
ax_h[0][0].plot( times, rt_difference_data[0], label="radius error" )
ax_h[0][0].axhline( y=CUSUM_TOLERANCE[0], linewidth=1, linestyle="--",
                    label="CUSUM Radius Tolerance", color="blue"  )
ax_h[0][0].axhline( y=-CUSUM_TOLERANCE[0], linewidth=1, linestyle="--",
                    label="CUSUM Radius Tolerance", color="blue"  )

ax_h[0][1].set_title("Absolute Temperature Error")
ax_h[0][1].set_xlabel("time")
ax_h[0][1].set_ylabel("temperature error (m)")
ax_h[0][1].plot( times, rt_difference_data[1], label="temperature error" )
ax_h[0][1].axhline( y=CUSUM_TOLERANCE[1], linewidth=1, linestyle="--",
                    label="CUSUM Temperature Tolerance", color="blue"  )
ax_h[0][1].axhline( y=-CUSUM_TOLERANCE[1], linewidth=1, linestyle="--",
                    label="CUSUM Temperature Tolerance", color="blue"  )

ax_h[1][0].set_title("Radius CUSUM")
ax_h[1][0].set_xlabel("time")
ax_h[1][0].set_ylabel("cumulative radius error")
ax_h[1][0].plot( times, cusum_data[0].T, label=["positive radius CUSUM error", "negative radius CUSUM error"] )
ax_h[1][0].axhline( y=CUSUM_THRESHOLD[0], linewidth=1, linestyle="--",
                    label="CUSUM Radius Threshold", color="blue"  )
ax_h[1][0].axhline( y=-CUSUM_THRESHOLD[0], linewidth=1, linestyle="--",
                    label="CUSUM Radius Threshold", color="blue"  )

ax_h[1][1].set_title("Temperature CUSUM")
ax_h[1][1].set_xlabel("time")
ax_h[1][1].set_ylabel("cumulative temperature error")
ax_h[1][1].plot( times, cusum_data[1].T, label=["positive temperature CUSUM error", "negative temperature CUSUM error"] )
ax_h[1][1].axhline( y=CUSUM_THRESHOLD[1], linewidth=1, linestyle="--",
                    label="CUSUM Temperature Threshold", color="blue"  )
ax_h[1][1].axhline( y=-CUSUM_THRESHOLD[1], linewidth=1, linestyle="--",
                    label="CUSUM Temperature Threshold", color="blue"  )

for direction, parameter, time in zip( deviation_directions, deviation_parameters, deviation_times ):
    line_label = f"{parameter} CUSUM deviation in {direction} direction"
    if parameter == DeviationParameter.RADIUS:
        ax_h[1][0].axvline( x=time, linewidth=1, linestyle="--", label=line_label, color="red" )
    else:
        ax_h[1][1].axvline( x=time, linewidth=1, linestyle="--", label=line_label, color="red" )

fig_h.tight_layout()

In [None]:
# Test with 1D arrays - output should be identical

radius_cusum_data = calculate_cusum( rt_difference_data[0], CUSUM_TOLERANCE[0] )
temperature_cusum_data = calculate_cusum( rt_difference_data[1], CUSUM_TOLERANCE[1] )

radius_deviation_masks  = detect_cusum_deviations( cusum_data[0], CUSUM_THRESHOLD[0] )
temperature_deviation_masks  = detect_cusum_deviations( cusum_data[1], CUSUM_THRESHOLD[1] )

cusum_data = np.array( [radius_cusum_data, temperature_cusum_data] )
deviation_masks = np.vstack( [radius_deviation_masks, temperature_deviation_masks] )

deviation_direction_vector = np.array( [DeviationDirection.POSITIVE,
                                        DeviationDirection.NEGATIVE,
                                        DeviationDirection.POSITIVE,
                                        DeviationDirection.NEGATIVE] )
deviation_parameter_vector = np.array( [DeviationParameter.RADIUS,
                                        DeviationParameter.RADIUS,
                                        DeviationParameter.TEMPERATURE,
                                        DeviationParameter.TEMPERATURE] )

deviation_counts = deviation_masks.sum( axis=1 )

deviation_directions   = np.hstack( [np.full( count, direction ) for count, direction
                                     in zip( deviation_counts, deviation_direction_vector )] )
deviation_parameters   = np.hstack( [np.full( count, direction ) for count, direction
                                     in zip( deviation_counts, deviation_parameter_vector )] )
deviation_times        = np.hstack( [times[mask] for mask in deviation_masks] )

In [None]:

fig_h, ax_h = plt.subplots( 2, 2, figsize=(12, 8) )
fig_h.suptitle( "CUSUM Test Run passing individual variable arrays" )

ax_h[0][0].set_title("Absolute Radius Error")
ax_h[0][0].set_xlabel("time")
ax_h[0][0].set_ylabel("radius error (m)")
ax_h[0][0].plot( times, rt_difference_data[0], label="radius error" )
ax_h[0][0].axhline( y=CUSUM_TOLERANCE[0], linewidth=1, linestyle="--",
                    label="CUSUM Radius Tolerance", color="blue"  )
ax_h[0][0].axhline( y=-CUSUM_TOLERANCE[0], linewidth=1, linestyle="--",
                    label="CUSUM Radius Tolerance", color="blue"  )

ax_h[0][1].set_title("Absolute Temperature Error")
ax_h[0][1].set_xlabel("time")
ax_h[0][1].set_ylabel("temperature error (m)")
ax_h[0][1].plot( times, rt_difference_data[1], label="temperature error" )
ax_h[0][1].axhline( y=CUSUM_TOLERANCE[1], linewidth=1, linestyle="--",
                    label="CUSUM Temperature Tolerance", color="blue"  )
ax_h[0][1].axhline( y=-CUSUM_TOLERANCE[1], linewidth=1, linestyle="--",
                    label="CUSUM Temperature Tolerance", color="blue"  )

ax_h[1][0].set_title("Radius CUSUM")
ax_h[1][0].set_xlabel("time")
ax_h[1][0].set_ylabel("cumulative radius error")
ax_h[1][0].plot( times, cusum_data[0].T, label=["positive radius CUSUM error", "negative radius CUSUM error"] )
ax_h[1][0].axhline( y=CUSUM_THRESHOLD[0], linewidth=1, linestyle="--",
                    label="CUSUM Radius Threshold", color="blue"  )
ax_h[1][0].axhline( y=-CUSUM_THRESHOLD[0], linewidth=1, linestyle="--",
                    label="CUSUM Radius Threshold", color="blue"  )

ax_h[1][1].set_title("Temperature CUSUM")
ax_h[1][1].set_xlabel("time")
ax_h[1][1].set_ylabel("cumulative temperature error")
ax_h[1][1].plot( times, cusum_data[1].T, label=["positive temperature CUSUM error", "negative temperature CUSUM error"] )
ax_h[1][1].axhline( y=CUSUM_THRESHOLD[1], linewidth=1, linestyle="--",
                    label="CUSUM Temperature Threshold", color="blue"  )
ax_h[1][1].axhline( y=-CUSUM_THRESHOLD[1], linewidth=1, linestyle="--",
                    label="CUSUM Temperature Threshold", color="blue"  )

for direction, parameter, time in zip( deviation_directions, deviation_parameters, deviation_times ):
    line_label = f"{parameter} CUSUM deviation in {direction} direction"
    if parameter == DeviationParameter.RADIUS:
        ax_h[1][0].axvline( x=time, linewidth=1, linestyle="--", label=line_label, color="red" )
    else:
        ax_h[1][1].axvline( x=time, linewidth=1, linestyle="--", label=line_label, color="red" )

fig_h.tight_layout()