# Prototype V5
New Features:
- Offline Training - find a rough threshold

Note: 
- From this version on, the programming philosophy is absolutely different from the previous versions.

# Overview

## Phase I - Offline Training

- Data Generation
- Approximate Threshold Finding Using Grid Search
  - determine the range of threshold
  - determine the step size
  - keep track of metrics
  - find the best threshold

## Phase II - Online Fine Tuning
Reinforcement Learning - maybe Q* Learning

In [75]:
# Dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import seaborn as sns
import os
import csv
import random


In [76]:
# Environment Variables
Num_Events = 10000
ratio_interest = 0.05
Num_Events_Interest = int(Num_Events * ratio_interest)
Num_Events_NoInterest = Num_Events - Num_Events_Interest
random_number = 0
random_seed = 2023
random.seed(random_seed)
event_value = 0
event_class = 0

# Events Generation
# Events of No Interest
mean_0 = 10
std_0 = 2
# Events of Interest
mean_1 = 15
std_1 = 2

In [77]:
# Events Generation

# Events Matrix
Events = np.zeros((Num_Events, 3)) # col 0: event number, col 1: event value, col 2: event class

# Generation Loop
for i in range(Num_Events):
    random_number = random.random()
    if random_number < ratio_interest: # Event of Interest
        event_value = random.gauss(mean_1, std_1)
        event_class = 1
    else: # Event of No Interest
        event_value = random.gauss(mean_0, std_0)
        event_class = 0
    Events[i, 0] = i
    Events[i, 1] = event_value
    Events[i, 2] = event_class



In [78]:
# Threshold Finding - starting point is the minimum value of the events, ending point is the maximum value of the events
T_min = np.min(Events[:, 1])
T_max = np.max(Events[:, 1])

T_step = (T_max - T_min)/200

In [79]:
# Healper Functions for Metrics Calculation

# larger beta, more weight on recall; smaller beta, more weight on precision. 1 is the same weight.
BETA = 1

def Precision(TP, FP):
    if TP + FP == 0:
        return 0
    else:
        return TP / (TP + FP)
def Recall(TP, FN):
    if TP + FN == 0:
        return 0
    else:
        return TP / (TP + FN)
def Fbeta_Score(TP, FP, FN):
    if Precision(TP, FP) == 0 or Recall(TP, FN) == 0:
        return 0
    else:
        return (1 + BETA**2) * Precision(TP, FP) * Recall(TP, FN) / (BETA**2 * Precision(TP, FP) + Recall(TP, FN))

# Prepare the Metrics Matrix
Metrics = np.zeros((round((T_max - T_min)/T_step) + 1, 9)) # col 0: threshold number, col 1: threshold value, col 2: TP, col 3: FP, col 4: TN, col 5: FN, col 6: Precision, col 7: Recall, col 8: Fbeta score


In [80]:
# Loop for Threshold Finding
for i in range(round((T_max - T_min)/T_step) + 1):
    # Threshold
    T = T_min + i * T_step
    Metrics[i, 0] = i
    Metrics[i, 1] = T
    # Loop for Events
    for j in range(Num_Events):
        if Events[j, 1] >= T: # Event of Interest
            if Events[j, 2] == 1: # True Positive
                Metrics[i, 2] = Metrics[i, 2] + 1
            else: # False Positive
                Metrics[i, 3] = Metrics[i, 3] + 1
        else: # Event of No Interest
            if Events[j, 2] == 0: # True Negative
                Metrics[i, 4] = Metrics[i, 4] + 1
            else: # False Negative
                Metrics[i, 5] = Metrics[i, 5] + 1
    # Metrics Calculation
    Metrics[i, 6] = Precision(Metrics[i, 2], Metrics[i, 3])
    Metrics[i, 7] = Recall(Metrics[i, 2], Metrics[i, 5])
    Metrics[i, 8] = Fbeta_Score(Metrics[i, 2], Metrics[i, 3], Metrics[i, 5])

In [81]:
# Use Plotly to plot the events of no interest and interest
# Use different colors for events of no interest and interest, (255, 222, 0) and (0, 90, 171) respectively
# Use different markers for events of no interest and interest, circle and circle respectively
# Use different sizes for events of no interest and interest, 7 and 7 respectively
# Use different opacities for events of no interest and interest, 0.3 and 1 respectively
# Use different names for events of no interest and interest, 'No Interest' and 'Interest' respectively
# Use different legends for events of no interest and interest, 'No Interest' and 'Interest' respectively

# Find the threshold with the highest Fbeta score
T_best = Metrics[np.argmax(Metrics[:, 8]), 1]

# Prepare the data for plotting
# Events of No Interest
Events_NoInterest = Events[Events[:, 2] == 0]
# Events of Interest
Events_Interest = Events[Events[:, 2] == 1]

# Plot the events
fig_events = go.Figure()
fig_events.add_trace(go.Scatter(x = Events_NoInterest[:, 0], y = Events_NoInterest[:, 1], mode = 'markers', marker = dict(color = 'rgba(255, 222, 0, 0.3)', symbol = 'circle', size = 7), name = 'No Interest'))
fig_events.add_trace(go.Scatter(x = Events_Interest[:, 0], y = Events_Interest[:, 1], mode = 'markers', marker = dict(color = 'rgba(0, 90, 171, 1)', symbol = 'circle', size = 7), name = 'Interest'))
fig_events.add_trace(go.Scatter(x = [0, Num_Events], y = [T_best, T_best], mode = 'lines', marker = dict(color = 'rgba(0, 255, 0, 1)'), name = 'Optimal Threshold'))

fig_events.update_layout(title = 'Events', xaxis_title = 'Event Number', yaxis_title = 'Event Value')
fig_events.show()

In [82]:
# Use Plotly to plot the metrics， I need to plot the precision, recall and Fbeta score as functions of the threshold，use 3 subplots

# Prepare the data for plotting
# Precision
Precision = Metrics[:, 6]
# Recall
Recall = Metrics[:, 7]
# Fbeta Score
Fbeta_Score = Metrics[:, 8]
# Threshold
Threshold = Metrics[:, 1]

# Plot the metrics
fig_metrics = go.Figure()
fig_metrics.add_trace(go.Scatter(x = Threshold, y = Precision, mode = 'lines', name = 'Precision'))
fig_metrics.add_trace(go.Scatter(x = Threshold, y = Recall, mode = 'lines', name = 'Recall'))
fig_metrics.add_trace(go.Scatter(x = Threshold, y = Fbeta_Score, mode = 'lines', name = 'Fbeta Score'))
fig_metrics.update_layout(title = 'Metrics', xaxis_title = 'Threshold', yaxis_title = 'Value')
fig_metrics.show()