In [2]:
import json
import sys
sys.path.insert(1, "../")
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from parse import parser
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.decomposition import PCA

In [3]:
dataMario = pd.read_csv("../csv/EVDb_SimCaPlus_Mario0006.csv")
dataMario.sort_values(by=["MARKER_NR", "FRAME"], inplace=True)
dataMario.head()

Unnamed: 0,FRAME,TIME,PHANTOM,MARKER_NR,TRAJECTORY_TYPE,X,Y,Z,VX,VY,VZ,VELOCITY
0,1,0.0,skull,midpoint,calculated,391.053,333.058,184.309,,,,
12,2,0.005,skull,midpoint,calculated,391.053,333.058,184.309,,,,
24,3,0.01,skull,midpoint,calculated,391.053,333.058,184.309,,,,
36,4,0.015,skull,midpoint,calculated,391.053,333.058,184.309,,,,
48,5,0.02,skull,midpoint,calculated,391.053,333.058,184.309,,,,


In [68]:
def LTTB(data: pd.DataFrame, threshold: int, bins=-1) -> pd.DataFrame:
    """
    Reduce the number of points in a dataset by using the Largest Triangle Three Buckets algorithm.
    """
    if bins == -1:
        bins = []
        for d in data.groupby("MARKER_NR"):
            slice = d[1].reset_index().set_index("FRAME")
            maxSeconds = (slice.TIME.max() // 0.5) * 0.5
            time = np.arange(0, maxSeconds+0.5, 0.5)
            interval = pd.interval_range(start=slice.index[0], end=slice.index[-2], periods=threshold-2, closed="right")
            bins.append(interval)
    
    final = pd.Index([])
    for i, d in enumerate(data.groupby("MARKER_NR")):
        slice = d[1].reset_index().set_index("FRAME")
        bin = bins[i]
        n = len(bin)
        cut = pd.cut(slice.index, bin)
        slice["bins"] = cut
        
        
        
        selected = []
        selected.append(slice.index[0])
        
        binGroups = slice.groupby("bins", observed=False)
        
        prev = slice.iloc[0, :].loc[["X", "Y", "Z"]]
        for i in range(len(bin)):
            
            flag = binGroups.get_group(bin[i]).TIME.isin(time)
            intSec = binGroups.get_group(bin[i]).loc[flag].index.values
            for s in intSec:
                if s not in selected:
                    selected.append(int(s))
                    continue
            
            if i == len(bin) - 1:
                avg = slice.iloc[-1, :].loc[["X", "Y", "Z"]]
            else:
                avg = binGroups.get_group(bin[i+1]).loc[:, ["X", "Y", "Z"]].mean()
            AB = binGroups.get_group(bin[i]).loc[:, ["X", "Y", "Z"]] - prev
            AC = avg - prev
            
            area = pd.DataFrame(np.linalg.norm(np.cross(AB, AC).astype("float"), axis=1) / 2, index=binGroups.get_group(bin[i]).index, columns=["area"])
            maxIndex = area.idxmax()
            if maxIndex.values[0] not in selected:
                selected.append(maxIndex.values[0])
            
            prev = slice.loc[maxIndex, ["X", "Y", "Z"]].values.reshape(3)
        
        selected.append(slice.index[-1])
        downsampled = slice.loc[selected, :].reset_index().set_index("index")
        final = final.union(downsampled.index)
    print(final.shape)
        
        
    return data.loc[final, :]

In [21]:
def avgDownSampling(data: pd.DataFrame, threshold: int, bins=-1) -> pd.DataFrame:
    """
    Reduce the number of points in a dataset by using the Largest Triangle Three Buckets algorithm.
    """
    if bins == -1:
        bins = []
        for d in data.groupby("MARKER_NR"):
            slice = d[1].reset_index().set_index("FRAME")
            interval = pd.interval_range(start=slice.index[0], end=slice.index[-2], periods=threshold-2, closed="right")
            bins.append(interval)
    
    final = pd.DataFrame(columns=data.columns)
    for i, d in enumerate(data.groupby("MARKER_NR")):
        slice = d[1].reset_index().set_index("FRAME")
        bin = bins[i]
        n = len(bin)
        cut = pd.cut(slice.index, bin)
        slice["bins"] = cut
        
        final = pd.concat([final, pd.DataFrame(d[1].iloc[0, :]).T], ignore_index=True)
        for b in slice.groupby("bins", observed=False):
            avg = b[1].reset_index().loc[:, ["FRAME", "TIME", "X", "Y", "Z"]].mean()
            avg.loc["FRAME"] = np.round(avg.loc["FRAME"])
            temp = b[1].reset_index().set_index("index").iloc[0, :].copy()

            temp.loc[["TIME", "FRAME", "X", "Y", "Z"]] = avg
            
            final = pd.concat([final, pd.DataFrame(temp).T], ignore_index=True)
        
        final = pd.concat([final, pd.DataFrame(d[1].iloc[-1, :]).T], ignore_index=True)
    
    return final.drop("bins", axis="columns")

In [5]:
avgSubsampled = avgDownSampling(dataMario, 500)

In [69]:
LTTB(dataMario, 100)

(2629,)


Unnamed: 0,FRAME,TIME,PHANTOM,MARKER_NR,TRAJECTORY_TYPE,X,Y,Z,VX,VY,VZ,VELOCITY
0,1,0.000,skull,midpoint,calculated,391.053,333.058,184.309,,,,
1,1,0.000,skull,phantom1,Measured,420.188,336.076,221.468,,,,
2,1,0.000,skull,phantom2,Measured,392.340,395.010,178.267,,,,
3,1,0.000,skull,phantom3,Measured,452.539,326.562,134.224,,,,
4,1,0.000,skull,phantom4,Measured,390.401,301.714,187.366,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
144235,12020,60.095,catheter3_mid3,tip3,Measured,159.942,333.667,487.839,,,,
144236,12020,60.095,catheter3_mid3,tip4,Measured,117.212,318.223,514.824,,,,
144237,12020,60.095,catheter3_mid3,tip5,Measured,151.675,294.355,516.437,,,,
144238,12020,60.095,catheter3_mid3,tip6,Measured,129.790,292.759,571.621,,,,


In [56]:
subsampled = LTTB(dataMario, 500)
subsampled.head()

(6703,)


Unnamed: 0,FRAME,TIME,PHANTOM,MARKER_NR,TRAJECTORY_TYPE,X,Y,Z,VX,VY,VZ,VELOCITY
0,1,0.0,skull,midpoint,calculated,391.053,333.058,184.309,,,,
1,1,0.0,skull,phantom1,Measured,420.188,336.076,221.468,,,,
2,1,0.0,skull,phantom2,Measured,392.34,395.01,178.267,,,,
3,1,0.0,skull,phantom3,Measured,452.539,326.562,134.224,,,,
4,1,0.0,skull,phantom4,Measured,390.401,301.714,187.366,,,,


In [67]:
subsampled.tail()

Unnamed: 0,FRAME,TIME,PHANTOM,MARKER_NR,TRAJECTORY_TYPE,X,Y,Z,VX,VY,VZ,VELOCITY
144235,12020,60.095,catheter3_mid3,tip3,Measured,159.942,333.667,487.839,,,,
144236,12020,60.095,catheter3_mid3,tip4,Measured,117.212,318.223,514.824,,,,
144237,12020,60.095,catheter3_mid3,tip5,Measured,151.675,294.355,516.437,,,,
144238,12020,60.095,catheter3_mid3,tip6,Measured,129.79,292.759,571.621,,,,
144239,12020,60.095,catheter3_mid3,tip7,Mixed,234.199,428.096,329.068,,,,


In [59]:
tip = subsampled.loc[subsampled["MARKER_NR"] == "tip7", :]
fig = px.line_3d(tip, x="X", y="Y", z="Z", hover_data=["FRAME"])
actual = dataMario.loc[dataMario["MARKER_NR"] == "tip7", :]
fig.add_trace(go.Scatter3d(x=actual["X"], y=actual["Y"], z=actual["Z"], mode="lines"))
fig.write_html("graphs/subsamplingResultsTip7.html")
fig.show()

In [58]:
tip = subsampled.loc[subsampled["MARKER_NR"] == "tip1", :]
fig = px.line_3d(tip, x="X", y="Y", z="Z", hover_data=["FRAME"])
actual = dataMario.loc[dataMario["MARKER_NR"] == "tip1", :]
fig.add_trace(go.Scatter3d(x=actual["X"], y=actual["Y"], z=actual["Z"], mode="lines"))
fig.write_html("graphs/subsamplingResultsTip1.html")
fig.show()

In [10]:
fig = px.line_3d(subsampled, x="X", y="Y", z="Z", hover_data=["FRAME"], color="MARKER_NR")
fig.write_html("subsamplingResultsFull.html")
fig.show()

In [11]:
subsampled = LTTB(dataMario, 1000)
subsampled.head()

(12000,)


Unnamed: 0,FRAME,TIME,PHANTOM,MARKER_NR,TRAJECTORY_TYPE,X,Y,Z,VX,VY,VZ,VELOCITY
0,1,0.0,skull,midpoint,calculated,391.053,333.058,184.309,,,,
1,1,0.0,skull,phantom1,Measured,420.188,336.076,221.468,,,,
2,1,0.0,skull,phantom2,Measured,392.34,395.01,178.267,,,,
3,1,0.0,skull,phantom3,Measured,452.539,326.562,134.224,,,,
4,1,0.0,skull,phantom4,Measured,390.401,301.714,187.366,,,,


In [12]:
tip = subsampled.loc[subsampled["MARKER_NR"] == "tip7", :]
fig = px.line_3d(tip, x="X", y="Y", z="Z", hover_data=["FRAME"])
actual = dataMario.loc[dataMario["MARKER_NR"] == "tip7", :]
fig.add_trace(go.Scatter3d(x=actual["X"], y=actual["Y"], z=actual["Z"], mode="lines"))
fig.write_html("subsampling1kResultsTip7.html")
fig.show()

In [13]:
tip = subsampled.loc[subsampled["MARKER_NR"] == "tip1", :]
fig = px.line_3d(tip, x="X", y="Y", z="Z", hover_data=["FRAME"])
actual = dataMario.loc[dataMario["MARKER_NR"] == "tip1", :]
fig.add_trace(go.Scatter3d(x=actual["X"], y=actual["Y"], z=actual["Z"], mode="lines"))
fig.write_html("subsampling1kResultsTip1.html")
fig.show()

In [14]:
fig = px.line_3d(subsampled, x="X", y="Y", z="Z", hover_data=["FRAME"], color="MARKER_NR")
fig.write_html("subsampling1kResultsFull.html")
fig.show()

In [15]:
tip = subsampled.loc[subsampled["MARKER_NR"] == "tip7", :]
fig = px.line_3d(tip, x="X", y="Y", z="Z", hover_data=["FRAME"])
actual = dataMario.loc[dataMario["MARKER_NR"] == "tip7", :]
fig.add_trace(go.Scatter3d(x=actual["X"], y=actual["Y"], z=actual["Z"], mode="lines"))
avg = avgSubsampled.loc[avgSubsampled["MARKER_NR"] == "tip7", :]
fig.add_trace(go.Scatter3d(x=avg["X"], y=avg["Y"], z=avg["Z"], mode="lines"))
fig.write_html("subsamplingComparisonTip7.html")
fig.show()