In [None]:
import ruptures as rpt
from src.classes.MobileData import MobileData
from src.classes.Utility import Utility
from ressources.enums.DrillingProcess import DrillingProcess
from ressources.enums.SmoothingProcess import SmoothingProcess
from src.classes.MetricSummary import MetricSummary

## Drilling Data

**Load the data, set index and scale the data to prepare for changepoint analysis.**
**In order to efficiency the data is chunked**

In [ ]:
drilling_data = MobileData(DrillingProcess.PROCESS_23).df
drilling_data = Utility.scale_data(drilling_data)
print(len(drilling_data))
drilling_data = Utility.chunk_df_by_time(drilling_data,'20s')[14]
Utility.plot_data(drilling_data,None,'','')# oversight
drill_true_cps = [2600, 4002, 10850, 12209, 19000, 20415, 27300, 28660, 35550, 36920, 43830, 45205, len(drilling_data)] #last index is dummy index, ground truth
type1,type2 = 'Drilling','Smoothing'
proc1,proc2 = str(DrillingProcess.PROCESS_23.name), str(SmoothingProcess.PROCESS_26.name)

### Parameters for changepoint detection 

**model = costfunction for the segments / model that describes the data**
**penalty = penalty-term that that controls sensitivity of the detection.There are the following options:**
    **-Bayesian information criterion (BIC)**
    **-Hannan and quinn information criterion (hannan quinn)**
    **-Akaike information criterion**
**search-method = Algorithm that minimizes the costs**

In [ ]:
#BIC
model = 'l2' # mean
num_samples = len(drilling_data)
estimated_cps = 6
penalty = Utility.get_penalty(num_samples,'BIC',estimated_cps,2) # assume a more complex second order stucture for bending moment
print(penalty)

## PELT-Alogithm

In [ ]:
alg = 'PELT'
#drilling_data.to_numpy()
pelt = rpt.Pelt(model='l2',min_size=500, jump=50).fit(drilling_data)
drill_changepoints = pelt.predict(pen=penalty)
print(drill_changepoints) 

In [ ]:
rpt.display(drilling_data,[],drill_changepoints)

**the min_size parameter is not considered over multiple dimensions by the algorithm.It has to be post corrected**

In [ ]:
treshold = 1400 #min distance between cps
corrected_cps_drill = Utility.adaptive_mean_filter(drilling_data,drill_changepoints,treshold)
#corrected_cps_drill = np.delete(corrected_cps_drill, -1) #last element ist always lenght
print(corrected_cps_drill)

In [ ]:
rpt.display(drilling_data,[],corrected_cps_drill)

## Smoothing data

In [ ]:
smoothing_data = MobileData(SmoothingProcess.PROCESS_26).df
smoothing_data = Utility.scale_data(smoothing_data)
smoothing_data = Utility.chunk_df_by_time(smoothing_data,'10s')[14]
Utility.plot_data(smoothing_data,None,'','') # oversight
smooth_true_cps = [1550,2560,4000,5070,6450,7550,8900,10075,11500,12550,13950,15040,16450,17580,18960,20125,21500,22625,len(smoothing_data)]

**The smoothing data differs a lot from the friction data.So the parameters have to be corrected**

In [ ]:
estimated_cps = 9
num_samples = 12307
penalty = Utility.get_penalty(num_samples,'hannan quinn',estimated_cps,2) # assume a more complex second order stucture for bending moment
print(penalty)

In [ ]:
algo = rpt.KernelCPD(kernel='rbf',min_size=1000,jump=500)
algo.fit(smoothing_data.values)
cps = algo.predict(pen = penalty)
print(cps)

In [ ]:
rpt.display(smoothing_data,[],cps)

In [ ]:
pelt = pelt.fit(smoothing_data.values)
smoothing_changepoints = pelt.predict(penalty)
print(smoothing_changepoints)

In [ ]:
rpt.display(smoothing_data,[],smoothing_changepoints)

In [ ]:
treshold = 851
corrected_cps_smooth = Utility.adaptive_mean_filter(smoothing_data,smoothing_changepoints,treshold)
#corrected_cps_smooth = np.delete(corrected_cps_smooth, -1) #last element is alwways length
print(corrected_cps_smooth)

In [ ]:
rpt.display(smoothing_data,[],corrected_cps_smooth)

## Evaluation

In [ ]:
statistic_dfs = []
penalties = ['BIC','AIC','Hannan Quinn']
model1,model2 = 'l2','rbf'

In [ ]:
# Drilling data / l2 Config
num_samples = len(drilling_data)
estimated_cps = 12
m = MetricSummary()
metadata = {
    'type': type1,  
    'process': proc1, 
    'model': model1,  
    'num_samples': num_samples,  
    'estimated cps': estimated_cps, 
    'model params': 2 
}
algorithms = [rpt.Pelt(model=model1, min_size=500, jump=50), rpt.Binseg(model=model1),
              rpt.KernelCPD(kernel='linear', min_size=1000, jump=500)]
m.compare_cpd_algorithms(drilling_data,drill_true_cps,algorithms,penalties,None,metadata)
statistic_dfs.append(m.df)

In [ ]:
# Smoothing data / l2 Config
num_samples = len(smoothing_data)
estimated_cps = 9
m = MetricSummary()
metadata = {
    'type': type2, 
    'process': proc2, 
    'model': model1, 
    'num_samples': num_samples,  
    'estimated cps': estimated_cps,  
    'model params': 2 
}
algorithms = [rpt.Pelt(model=model1, min_size=500, jump=50), rpt.Binseg(model=model1),
              rpt.KernelCPD(kernel='linear', min_size=1000, jump=500)]
m.compare_cpd_algorithms(smoothing_data,smooth_true_cps,algorithms,penalties,None,metadata)
statistic_dfs.append(m.df)

In [ ]:
# Drilling data / rbf Config
num_samples = len(drilling_data)
estimated_cps = 12
m = MetricSummary()
metadata = {
    'type': type1,  
    'process': proc1,  
    'model': model2,  
    'num_samples': num_samples,  
    'estimated cps': estimated_cps,
    'model params': 2 
}
algorithms = [rpt.Pelt(model=model2, min_size=500, jump=50), rpt.Binseg(model=model2),
              rpt.KernelCPD(kernel=model2, min_size=1000, jump=500)]
m.compare_cpd_algorithms(drilling_data,drill_true_cps,algorithms,penalties,None,metadata)
statistic_dfs.append(m.df)

In [ ]:
# Smoothing data / rbf Config
num_samples = len(smoothing_data)
estimated_cps = 9
m = MetricSummary()
metadata = {
    'type': type2,  
    'process': proc2,  
    'model': model2, 
    'num_samples': num_samples, 
    'estimated cps': estimated_cps,  
    'model params': 2  
}
algorithms = [rpt.Pelt(model=model2, min_size=500, jump=50), rpt.Binseg(model=model2),
              rpt.KernelCPD(kernel=model2, min_size=1000, jump=500)]
m.compare_cpd_algorithms(smoothing_data,smooth_true_cps,algorithms,penalties,None,metadata)
statistic_dfs.append(m.df)

In [ ]:
statistic_dfs[0].head(statistic_dfs[0].size)

**For the drilling-process-data and the "l2"-Model  the Pelt algorithm overall shows a good balance between precision and recall, especially when using BIC. The Hausdorff distance is lowest with BIC, indicating a good match with the true changepoints. The runtime is relatively short and consistent.**

**The Binseg algorithm shows higher runtime compared to Pelt and KernelCPD, especially when using BIC. Precision is lower and the Hausdorff distance is more variable, indicating less consistent performance.**

**The KernelCPD algorithm shows the lowest runtime and consistent performance, although precision and F1-score are lower than Pelt.**

In [ ]:
statistic_dfs[1].head(statistic_dfs[1].size)

**For the smoothing-process-data and the "l2"-Model the Pelt algorithm exhibits strong performance with high precision, recall, and F1 scores, particularly with BIC and AIC penalties. The Hausdorff distance remains consistent, indicating accurate detection of changepoints. The runtime is exceptionally low across all penalty types.**

**The Binseg algorithm's performance fluctuates significantly, with lower precision and higher Hausdorff distance, especially with AIC. The runtime is relatively higher for BIC, but very low for other penalties.**

**The KernelCPD algorithm demonstrates excellent performance with the lowest annotation error and Hausdorff distance, and very high precision, recall, and F1 scores. The runtime is consistently low across all penalties.**

In [ ]:
statistic_dfs[2].head(statistic_dfs[2].size)

**The Pelt algorithm with RBF cost function shows strong performance with high precision and recall, particularly with the BIC penalty. The Hausdorff distance is lowest with BIC, indicating accurate changepoint detection. The runtime is consistent across all penalties.**

**The Binseg algorithm's performance fluctuates significantly, with lower precision and higher Hausdorff distance, especially with AIC. The runtime is relatively high for BIC but very low for other penalties.**

**The KernelCPD algorithm demonstrates excellent performance with the lowest annotation error and Hausdorff distance when using BIC and Hannan Quinn penalties. The precision, recall, and F1 scores are consistent across all penalties, making it a reliable choice for changepoint detection with the RBF cost function.**

In [ ]:
statistic_dfs[3].head(statistic_dfs[3].size)

**The Pelt algorithm shows very high precision and recall with all penalties, particularly with BIC, AIC, and Hannan Quinn. The Hausdorff distance is consistent and relatively low, indicating accurate changepoint detection. The runtime is extremely efficient.**

**The Binseg algorithm's performance varies, with lower precision and higher Hausdorff distance, especially with AIC. The runtime is relatively high for BIC but very low for other penalties.**

**The KernelCPD algorithm demonstrates outstanding performance with the lowest annotation error and Hausdorff distance when using BIC and Hannan Quinn penalties. The precision, recall, and F1 scores are consistent across all penalties, making it a reliable choice for changepoint detection with the RBF cost function.**

## Summary and Recommendations ##
**For drilling data, the Pelt algorithm with the L2 cost function is the preferred choice due to its high precision, recall, and overall accuracy. If the slight oversegmentation can be filtered out then the result should be excellent**

**For smoothing data, the KernelCPD algorithm with the RBF cost function is the preferred choice due to its consistent high performance, accuracy, and reliability**
