In [93]:
import numpy as np
from utils import read_dataset_data, split_data, reshape_data, split_data_with_ind
from aeon.classification.interval_based import TimeSeriesForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from joblib import dump, load
from tqdm import tqdm
from typing import List

In [86]:
dataset_path: str = "bounce_dataset.csv"
dataset_init = read_dataset_data(dataset_path)

# Bounce analysis


In [87]:
first_bounce: List[int] = []
between_bounce_time: List[int] = []
ind: int = 0
n: int = len(dataset_init)

for game_id in range(51):
    prev_bounce_time: int = 0
    curr_time: int = 0

    while ind < n and dataset_init[ind][0] == game_id and dataset_init[ind][5] == 0:
        ind += 1
        curr_time += 1
    if ind < n and dataset_init[ind][0] == game_id and dataset_init[ind][5] == 1:
        first_bounce.append(curr_time)
        prev_bounce_time = curr_time
        ind += 1
        curr_time += 1
    while ind < n and dataset_init[ind][0] == game_id:
        if dataset_init[ind][5] == 1:
            between_bounce_time.append(curr_time - prev_bounce_time)
            prev_bounce_time = curr_time
        ind += 1
        curr_time += 1

In [17]:
print(f"First bounce min time: {np.min(first_bounce)}")
print(f"First bounce max time: {np.max(first_bounce)}")
print(f"First bounce average time: {np.mean(first_bounce)}")
print(f"First bounce median time: {np.median(first_bounce)}")
print(f"First bounce median time: {np.std(first_bounce)}")
print()
print(f"Between bounce min time: {sorted(between_bounce_time)[2]}")
print(f"Between bounce max time: {np.max(between_bounce_time)}")
print(f"Between bounce average time: {np.mean(between_bounce_time)}")
print(f"Between bounce median time: {np.median(between_bounce_time)}")
print(f"Between bounce median time: {np.std(between_bounce_time)}")

First bounce min time: 2
First bounce max time: 45
First bounce average time: 25.098039215686274
First bounce median time: 28.0
First bounce median time: 12.143351125100589

Between bounce min time: 16
Between bounce max time: 103
Between bounce average time: 37.84649122807018
Between bounce median time: 36.0
Between bounce median time: 13.042714440223758


# Detect bounce in window

In [88]:
window_size: int = 10
x_dataset, _, y_window_dataset  = split_data(dataset_init, total_games=51, window_size=window_size)
x_dataset = reshape_data(x_dataset, is_window=True, window_size=window_size)

In [89]:
print(f"Initial size: {len(dataset_init)}")
print(f"Input size: {len(x_dataset)}\nLabels size: {len(y_window_dataset)}")

Initial size: 10771
Input size: 9799
Labels size: 9799


In [90]:
x_dataset = np.array(x_dataset, dtype=np.float32)
y_window_dataset = np.array(y_window_dataset, dtype=np.int8)

In [91]:
X_train, X_test, y_window_train, y_window_test = train_test_split(x_dataset, y_window_dataset, test_size=0.2, random_state=42)

In [94]:
tsfc = TimeSeriesForestClassifier(n_estimators=100, random_state=42)
tsfc.fit(np.array(X_train), np.array(y_window_train))

In [None]:
y_window_pred = tsfc.predict(X_test)

accuracy = accuracy_score(y_window_test, y_window_pred)
print(f"Accuracy of test data: {accuracy:.4f}")

Accuracy of test data: 0.9464


In [None]:
y_window_pred = tsfc.predict(x_dataset)

accuracy = accuracy_score(y_window_dataset, y_window_pred)
print(f"Accuracy of full data: {accuracy:.4f}")

Accuracy of test data: 0.9879


In [None]:
report = classification_report(y_window_test, y_window_pred)
print(report)

              precision    recall  f1-score   support

           0       0.94      0.87      0.90       330
           1       0.97      0.99      0.98      1374

    accuracy                           0.96      1704
   macro avg       0.95      0.93      0.94      1704
weighted avg       0.96      0.96      0.96      1704



In [17]:
for window_size in [3, 5, 10, 15, 20, 25, 30]:
    print(f"---------------Window size = {window_size}---------------")
    x_dataset, _, y_window_dataset  = split_data(dataset_init, total_games=51, window_size=window_size)
    x_dataset = reshape_data(x_dataset, is_window=True, window_size=window_size)
    x_dataset = np.array(x_dataset, dtype=np.float32)
    y_window_dataset = np.array(y_window_dataset, dtype=np.int8)

    X_train, X_test, y_window_train, y_window_test = train_test_split(x_dataset, y_window_dataset, test_size=0.2, random_state=42)
    tsfc = TimeSeriesForestClassifier(n_estimators=100, random_state=42)
    tsfc.fit(np.array(X_train), np.array(y_window_train))

    #test dataset
    y_window_pred_test = tsfc.predict(X_test)
    accuracy = accuracy_score(y_window_test, y_window_pred_test)
    print(f"Accuracy of test data: {accuracy:.4f}")

    #full dataset
    y_window_pred = tsfc.predict(x_dataset)
    accuracy = accuracy_score(y_window_dataset, y_window_pred)
    print(f"Accuracy of full data: {accuracy:.4f}")

    report = classification_report(y_window_test, y_window_pred_test)
    print(report)
    print()
    dump(tsfc, f'tsfc_size{window_size}.joblib')

---------------Window size = 3---------------
Accuracy of test data: 0.9385
Accuracy of full data: 0.9877
              precision    recall  f1-score   support

           0       0.94      0.99      0.97      1869
           1       0.83      0.38      0.52       181

    accuracy                           0.94      2050
   macro avg       0.89      0.69      0.74      2050
weighted avg       0.93      0.94      0.93      2050


---------------Window size = 5---------------
Accuracy of test data: 0.9397
Accuracy of full data: 0.9879
              precision    recall  f1-score   support

           0       0.94      1.00      0.97      1749
           1       0.96      0.58      0.72       275

    accuracy                           0.94      2024
   macro avg       0.95      0.79      0.84      2024
weighted avg       0.94      0.94      0.93      2024


---------------Window size = 10---------------
Accuracy of test data: 0.9464
Accuracy of full data: 0.9893
              precision  

# Main algorithm

In [82]:
tsfc5: TimeSeriesForestClassifier = load('tsfc_size5.joblib')
tsfc10: TimeSeriesForestClassifier = load('tsfc_size10.joblib')

In [83]:
def bounce_detector(game_id: int, tsfc5: TimeSeriesForestClassifier, tsfc10: TimeSeriesForestClassifier):
    start = 0
    end = len(dataset_init) - 1
    ind = 1
    while ind < len(dataset_init) - 1 and dataset_init[ind][0] != game_id + 1:
        if dataset_init[ind - 1][0] == game_id - 1 and dataset_init[ind][0] == game_id:
            start = ind
        if dataset_init[ind][0] == game_id and dataset_init[ind + 1][0] == game_id + 1:
            end = ind + 1
        ind += 1

    windows, indices = split_data_with_ind(dataset_init[start:end], 10)
    windows = reshape_data(windows, window_size=10)
    is_bounce: List[bool] = [False for _ in range(end - start)]

    predicts = tsfc10.predict(np.array(windows, dtype=np.float32))
    for i in range(len(windows) - 1):
        if indices[i + 1] - indices[i] == 1 and predicts[i + 1] == 0 and predicts[i] == 1:
            is_bounce[i] = True

    gt = []
    for i in range(start, end):
        if dataset_init[i][5] == 1:
            gt.append(i - start)
    
    windows, indices = split_data_with_ind(dataset_init[start:end], 5)
    windows = reshape_data(windows, window_size=5)

    predicts = tsfc5.predict(np.array(windows, dtype=np.float32))
    for i in range(len(windows) - 10, len(windows) - 1):
        if indices[i + 1] - indices[i] == 1 and predicts[i + 1] == 0 and predicts[i] == 1:
            is_bounce[i] = True

    pred = []
    for i in range(len(is_bounce)):
        if is_bounce[i]:
            pred.append(i)
     
    print(f"Ground truth: {gt}")
    print(f"Predicted: {pred}")

In [84]:
bounce_detector(14, tsfc5, tsfc10)

Ground truth: [10, 48, 90, 132, 159]
Predicted: [10, 48, 90, 132, 159]
