In [22]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
from sklearn.preprocessing import StandardScaler

In [23]:
sys.path.append('../../datasets/Turing_Change_Point_Dataset/examples/python')
from load_dataset import *
sys.path.append('../utils')
from evaluation import *
sys.path.append('../')
from ClassificationParScoreProfile import *
from DynamicProgramming import *
from FLOSS import *
from Pelt import *
from Window import *
from BayesianOnlineChangePointDetection import *
from Fluss import *

In [24]:

ts = TimeSeries.from_json('../../datasets/Turing_Change_Point_Dataset/datasets/gdp_iran/gdp_iran.json')
ts.df.head()

Unnamed: 0,t,GDP (constant LCU)
0,0,835372800000000.0
1,1,931253600000000.0
2,2,1011252000000000.0
3,3,1084815000000000.0
4,4,1184553000000000.0


In [27]:
import pandas as pd
import json

# Load JSON data
with open('../../datasets/Turing_Change_Point_Dataset/annotations.json') as f:
    data = json.load(f)

rows = []

def handle_nan(ts):
    nan_indices = np.isnan(ts)
    mean_val = np.nanmean(ts)
    ts[nan_indices] = mean_val
    return ts
# Iterate through each key-value pair in the JSON data
for ts_name, ts_data in data.items():
    path_file='../../datasets/Turing_Change_Point_Dataset/datasets/'+str(ts_name)+'/'+str(ts_name)+'.json'
    if  os.path.exists(path_file):
        ts = TimeSeries.from_json(path_file)
        ts=ts.df.drop('t',axis="columns").values.reshape(-1)
        change_points = []
        for cp, values in ts_data.items():
            change_points.extend(values)
        # Append a tuple with the required data to the list
        rows.append((ts_name,ts, change_points))

# Create a DataFrame from the list of tuples
tcpd = pd.DataFrame(rows, columns=['ts_name','ts', 'cp'])
tcpd['ts_length']=tcpd.ts.apply(len)

tcpd['cp']=tcpd.cp.apply(lambda x :list(set(x)))
tcpd=tcpd[tcpd.ts_length>=100]
tcpd=tcpd.drop('ts_length',axis="columns")
tcpd.head()


Unnamed: 0,ts_name,ts,cp
0,bank,"[7.360385187934427, 7.360385187934427, 7.36038...",[]
1,brent_spot,"[23.95, 26.31, 27.35, 28.12, 29.01, 28.83, 24....","[389, 271, 279, 409, 286, 287, 288, 169, 170, ..."
2,businv,"[802948.0, 809329.0, 813301.0, 819247.0, 81568...","[202, 203, 119, 213, 212, 215, 120]"
4,children_per_woman,"[5.77, 5.77, 5.78, 5.78, 5.78, 5.77, 5.77, 5.7...","[197, 168, 170, 177, 146, 145, 180]"
5,co2_canada,"[0.00568, 0.00561, 0.00555, 0.00548, 0.00542, ...","[67, 164, 133, 134, 163, 107, 173, 110, 80, 14..."


In [26]:
results={}
def evaluate_and_save(all_cps_true,all_cps_found,all_ts_len,algorithm_name):
    # Initialize lists to store metric values for each time series
    covering_scores = []
    f_measure_scores = []
    nab_scores = []
    i=0
    # Iterate over each time series
    for cps_true,cps_found,ts_len in zip(all_cps_true,all_cps_found,all_ts_len):
        i=i+1
        score_covering = covering({0: cps_true}, cps_found, ts_len)
        covering_scores.append(score_covering)
        
        # Calculate F-measure
        score_F = f_measure({0: cps_true}, cps_found, ts_len) 
        f_measure_scores.append(score_F)
        
        # Format true and predicted change points as Series
        true_cp, predicted_cp = formate_data(cps_true, cps_found,ts_len)
    
        # Evaluate change points using the evaluating_change_point function
        score_NAB = evaluating_change_point(true_cp, predicted_cp, metric='nab')
        nab_scores.append(score_NAB)
        # print(score_NAB)

    # Calculate mean scores across all time series
    mean_covering = sum(covering_scores) / len(covering_scores)
    mean_f_measure = sum(f_measure_scores) / len(f_measure_scores)

    # Calculate mean NAB scores
    nab_scores_filtered = [score for score in nab_scores if not np.isnan(score['Standart']) and not np.isinf(score['Standart'])]
    mean_nab_standard = sum(score['Standart'] for score in nab_scores_filtered) / len(nab_scores_filtered)
    mean_nab_lowFP = sum(score['LowFP'] for score in nab_scores_filtered) / len(nab_scores_filtered)
    mean_nab_lowFN = sum(score['LowFN'] for score in nab_scores_filtered) / len(nab_scores_filtered)


    # Print mean scores
    print(f"Mean Covering: {mean_covering}")
    print(f"Mean F-measure: {mean_f_measure}")
    print(f"Mean NAB (Standard): {mean_nab_standard}")
    print(f"Mean NAB (LowFP): {mean_nab_lowFP}")
    print(f"Mean NAB (LowFN): {mean_nab_lowFN}")

    results[algorithm_name]={"Covering":mean_covering,
                                        "F-measure":mean_f_measure,
                                        "NAB (Standard)":mean_nab_standard,
                                        "NAB (LowFP)":mean_nab_lowFP,
                                        "NAB (LowFN)":mean_nab_lowFN
    }


<h3>BinaryClaSPSegmentation</h3>

In [6]:
all_cps_true=[]
all_cps_found=[]
all_ts_len=[]
for _,(ts_name,ts,cps_true) in tqdm(tcpd.iterrows()):
  ts = ts[~np.isnan(ts)]
  cps_found = BinaryClaSPSegmentation().fit_predict(ts)
  all_cps_true.append(cps_true)
  all_cps_found.append(cps_found)
  all_ts_len.append(ts.shape[0])

evaluate_and_save(all_cps_true,all_cps_found,all_ts_len,"BinaryClaSPSegmentation")


24it [01:50,  4.60s/it]
  results[profile_name] = round(100*(matrix[0,t]-matrix[1,t])/(matrix[2,t]-matrix[1,t]), 2)


Mean Covering: 0.39629038281656687
Mean F-measure: 0.353410258042611
Mean NAB (Standard): 1.7299999999999998
Mean NAB (LowFP): 1.6536363636363633
Mean NAB (LowFN): 1.7595454545454545


  results[profile_name] = round(100*(matrix[0,t]-matrix[1,t])/(matrix[2,t]-matrix[1,t]), 2)


<h3>DynamicProgramming</h3>

In [7]:
all_cps_true=[]
all_cps_found=[]
all_ts_len=[]
for _,(ts_name,ts,cps_true) in tqdm(tcpd.iterrows()):
  cps_found = DynamicProgramming().fit_predict(np.array(ts),len(cps_true))[:-1]
  all_cps_true.append(cps_true)
  all_cps_found.append(cps_found)
  all_ts_len.append(ts.shape[0])

evaluate_and_save(all_cps_true,all_cps_found,all_ts_len,"DynamicProgramming")


24it [00:18,  1.27it/s]
  results[profile_name] = round(100*(matrix[0,t]-matrix[1,t])/(matrix[2,t]-matrix[1,t]), 2)
  results[profile_name] = round(100*(matrix[0,t]-matrix[1,t])/(matrix[2,t]-matrix[1,t]), 2)


Mean Covering: 0.64817002067074
Mean F-measure: 1.0
Mean NAB (Standard): 79.25363636363635
Mean NAB (LowFP): 74.25954545454546
Mean NAB (LowFN): 81.0059090909091


<h3>FLOSS</h3>

In [8]:
all_cps_true=[]
all_cps_found=[]
all_ts_len=[]
for _,(ts_name,ts,cps_true) in tqdm(tcpd.iterrows()):
    # Instantiate FLOSS
    floss_detector = FLOSS(n_timepoints=len(ts), window_size=20, n_prerun=500, threshold=0.7, excl_factor=5, verbose=0)
    # Update FLOSS with each time point
    for timepoint in ts:
        floss_detector.update(timepoint)

    cps_found = floss_detector.change_points
    all_cps_true.append(cps_true)
    all_cps_found.append(cps_found)
    all_ts_len.append(ts.shape[0])
    
evaluate_and_save(all_cps_true,all_cps_found,all_ts_len,"FLOSS")



0it [00:00, ?it/s]

24it [01:51,  4.64s/it]
  results[profile_name] = round(100*(matrix[0,t]-matrix[1,t])/(matrix[2,t]-matrix[1,t]), 2)
  results[profile_name] = round(100*(matrix[0,t]-matrix[1,t])/(matrix[2,t]-matrix[1,t]), 2)


Mean Covering: 0.38049812452739634
Mean F-measure: 0.3617947208020737
Mean NAB (Standard): 5.997272727272727
Mean NAB (LowFP): 5.459090909090909
Mean NAB (LowFN): 6.185454545454545


<h3>Pelt</h3>

In [9]:
all_cps_true=[]
all_cps_found=[]
all_ts_len=[]
for _,(ts_name,ts,cps_true) in tqdm(tcpd.iterrows()):
  cps_found = Pelt().fit_predict(np.array(ts),pen=15)[:-1]
  all_cps_true.append(cps_true)
  all_cps_found.append(cps_found)
  all_ts_len.append(ts.shape[0])

evaluate_and_save(all_cps_true,all_cps_found,all_ts_len,"Pelt")


24it [00:02,  9.47it/s]
  results[profile_name] = round(100*(matrix[0,t]-matrix[1,t])/(matrix[2,t]-matrix[1,t]), 2)
  results[profile_name] = round(100*(matrix[0,t]-matrix[1,t])/(matrix[2,t]-matrix[1,t]), 2)


Mean Covering: 0.41052472975224125
Mean F-measure: 0.48547272196963154
Mean NAB (Standard): 39.99454545454545
Mean NAB (LowFP): -7.898636363636363
Mean NAB (LowFN): 56.0190909090909


<h3>BOCD</h3>

In [28]:
all_cps_true=[]
all_cps_found=[]
all_ts_len=[]
# Iterate over each time series
for _,(ts_name,ts,cps_true) in tqdm(tcpd.iterrows()):
    ts=handle_nan(ts)
    cps_found = bocd(ts,len(cps_true))
    all_cps_true.append(cps_true)
    all_cps_found.append(cps_found)
    all_ts_len.append(ts.shape[0])

evaluate_and_save(all_cps_true,all_cps_found,all_ts_len,"BOCD")

24it [00:13,  1.84it/s]
  results[profile_name] = round(100*(matrix[0,t]-matrix[1,t])/(matrix[2,t]-matrix[1,t]), 2)
  results[profile_name] = round(100*(matrix[0,t]-matrix[1,t])/(matrix[2,t]-matrix[1,t]), 2)


Mean Covering: 0.4732270149005752
Mean F-measure: 0.8010912698412698
Mean NAB (Standard): 46.473636363636366
Mean NAB (LowFP): 43.88090909090909
Mean NAB (LowFN): 47.58545454545455


<h3>Fluss</h3>

In [11]:
all_cps_true=[]
all_cps_found=[]
all_ts_len=[]
for _,(ts_name,ts,cps_true) in tqdm(tcpd.iterrows()):
        # Parameters
    window_size = 50
    n_cps = len(cps_true)
    # Using fluss function
    cps_found = fluss(ts, window_size, n_cps)
    all_cps_true.append(cps_true)
    all_cps_found.append(cps_found)
    all_ts_len.append(ts.shape[0])
    
evaluate_and_save(all_cps_true,all_cps_found,all_ts_len,"Fluss")


0it [00:00, ?it/s]

24it [02:02,  5.09s/it]
  results[profile_name] = round(100*(matrix[0,t]-matrix[1,t])/(matrix[2,t]-matrix[1,t]), 2)
  results[profile_name] = round(100*(matrix[0,t]-matrix[1,t])/(matrix[2,t]-matrix[1,t]), 2)


Mean Covering: 0.39346570048642504
Mean F-measure: 0.3381569002892532
Mean NAB (Standard): -0.2531818181818182
Mean NAB (LowFP): -1.5118181818181817
Mean NAB (LowFN): 0.2013636363636363


In [41]:
# Convert dictionary to DataFrame
results_df = pd.DataFrame(results).T.reset_index()

# Rename the 'index' column to 'name_algo'
results_df = results_df.rename(columns={'index': 'name_algo'})
# save the file
results_df.to_csv('results/TCPD.csv', index=False)

print("DataFrame saved as 'TCPD.csv'")
# Display the DataFrame
results_df

DataFrame saved as 'TCPD.csv'


Unnamed: 0,name_algo,Covering,F-measure,NAB (Standard),NAB (LowFP),NAB (LowFN)
0,BinaryClaSPSegmentation,0.39629,0.35341,1.73,1.653636,1.759545
1,DynamicProgramming,0.64817,1.0,79.253636,74.259545,81.005909
2,FLOSS,0.380498,0.361795,5.997273,5.459091,6.185455
3,Pelt,0.410525,0.485473,39.994545,-7.898636,56.019091
4,BOCD,0.473227,0.801091,46.473636,43.880909,47.585455
5,Fluss,0.393466,0.338157,-0.253182,-1.511818,0.201364
