# Clean Data

In [None]:
import sys
sys.path.append("/home/martin/Dev/homography_imitation_learning")
import os
import pandas as pd
import numpy as np
from scipy import stats
from scipy.ndimage import uniform_filter

from utils.io import recursive_scan2df

prefix = "/media/martin/Samsung_T5/data/endoscopic_data/cholec80_circle_tracking_individual"
data_df = recursive_scan2df(prefix, ".pkl")
# data_df = data_df.sort_values(["folder", "file"]).reset_index(drop=True)

rolling = True
window = 50

for idx, row in data_df.iterrows():
    df = pd.read_pickle(os.path.join(prefix, row.folder, row.file))

    try:
        # radius
        z_score = np.abs(stats.zscore(df.radius, nan_policy='omit'))
        df.loc[z_score > 3, 'radius'] = np.nan
        # df.radius = df.radius.interpolate(method='pad')  # doesn't work correctly for some values
        # valid values
        valid = ~df.radius.isna().values

        # interpolation
        df.frame = df.frame.astype(float)
        inter = np.interp(df.frame.values, df.frame[valid].values, df.radius[valid].values)

        # set
        df.radius = inter.tolist()

        # center
        df.center = df.center.apply(lambda x: np.array(x) if x is not np.nan else np.full(2, np.nan))
        z_score = stats.zscore(np.stack(df.center.to_numpy()), nan_policy='omit')
        z_score = np.linalg.norm(z_score, axis=1)
        df.loc[z_score > 3, 'center'] = np.nan

        # valid values
        df.center = df.center.apply(lambda x: x if not np.isnan(x).any() else np.nan)
        valid = ~df.center.isna().values

        # nan nan
        df.center = df.center.apply(lambda x: x if x is not np.nan else np.full(2, np.nan))

        # interpolation
        values = np.stack(df.center)

        df.frame = df.frame.astype(float)
        inter = np.stack([
            np.interp(df.frame.values, df.frame[valid].values, values[valid][:,0]),
            np.interp(df.frame.values, df.frame[valid].values, values[valid][:,1])
        ], axis=-1)
        df.frame = df.frame.astype(np.int32)

        # set
        df.center = inter.tolist()

        df['radius_running_average_{}'.format(window)] = uniform_filter(df.radius.to_numpy(), size=(window)).tolist()#df.radius.rolling(window=window).mean()
        df['center_running_average_{}'.format(window)] = uniform_filter(np.stack(df.center.to_numpy()), size=(window,1)).tolist()

        df.to_csv(os.path.join(prefix, "df_interpolated", row.file.split('.')[0] + '.csv'))
    except Exception as e:
        print(e)
        continue

## Correct Shape

In [None]:
import sys
sys.path.append("/home/martin/Dev/homography_imitation_learning")
import pandas as pd
import numpy as np

from utils.io import recursive_scan2df

prefix = "/media/martin/Samsung_T5/data/endoscopic_data/cholec80_circle_tracking_individual/df_interpolated"
data_df = recursive_scan2df(prefix, ".csv")

for _, row in data_df.iterrows():
    df = pd.read_csv(os.path.join(prefix, row.folder, row.file), index_col=0)
    df['shape'] = df['shape'].apply(lambda x: [int(xi) for xi in x.replace('[', '').replace(']', '').split(',')][-3:])
    df['radius'] = df['radius'].apply(lambda x: float(x))
    df['center'] = df['center'].apply(lambda x: [float(xi) for xi in x.replace('[', '').replace(']', '').split(',')])
    df['radius_running_average_50'] = df['radius_running_average_50'].apply(lambda x: float(x))
    df['center_running_average_50'] = df['center_running_average_50'].apply(lambda x: [float(xi) for xi in x.replace('[', '').replace(']', '').split(',')])
    # df.to_pickle(os.path.join(prefix, row.folder, row.file.split('.')[0] + ".pkl"))
    # df.to_csv(os.path.join(prefix, row.folder, row.file))

## Group Dataframes

In [None]:
import sys
sys.path.append("/home/martin/Dev/homography_imitation_learning")
import pandas as pd
import numpy as np

from utils.io import recursive_scan2df

prefix = "/media/martin/Samsung_T5/data/endoscopic_data/cholec80_circle_tracking_individual/df_interpolated"
data_df = recursive_scan2df(prefix, ".pkl")

log_df = pd.DataFrame()

skip = [21, 64, 79]

for _, row in data_df.iterrows():
    df = pd.read_pickle(os.path.join(prefix, row.folder, row.file))

    if df.vid.iloc[0] in skip:
        print("Skipping: ", df.vid.iloc[0])
        continue

    log_df = log_df.append(
        df,
        ignore_index=True
    )

log_df.to_pickle(os.path.join(prefix, "log.pkl"))
log_df.to_csv(os.path.join(prefix, "log.csv"))

# Figures

In [None]:
# ideally:
# predict radius, center on BxCxHxW
# perform outlier rejection on radius/center

# currently:
# predict radius, center on reduced TxCxHxW segmentation

import sys
sys.path.append("/home/martin/Dev/homography_imitation_learning")
import os
from scipy import stats
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from scipy.ndimage import uniform_filter
mpl.rcParams['figure.dpi'] = 300

from utils.io import recursive_scan2df

prefix = "/media/martin/Samsung_T5/data/endoscopic_data/cholec80_circle_tracking_individual"
data_df = recursive_scan2df(prefix, ".pkl")
# data_df = data_df.sort_values(["folder", "file"]).reset_index(drop=True)

outlier_rejection = True
rolling = True
window = 50

for idx, row in data_df.iterrows():
    print(row.folder, row.file)
    df = pd.read_pickle(os.path.join(prefix, row.folder, row.file))
    
    df.center = df.center.apply(lambda x: np.array(x))

    if outlier_rejection:
        try:
            # radius
            z_score = np.abs(stats.zscore(df.radius, nan_policy='omit'))
            df.loc[z_score > 3, 'radius'] = np.nan
            # df.radius = df.radius.interpolate(method='pad')  # doesn't work correctly for some values
            # valid values
            valid = ~df.radius.isna().values

            # interpolation
            df.frame = df.frame.astype(float)
            inter = np.interp(df.frame.values, df.frame[valid].values, df.radius[valid].values)

            # set
            df.radius = inter.tolist()
            if rolling:
                # df.radius = df.radius.rolling(window=window).mean()
                df.radius = uniform_filter(df.radius.to_numpy(), size=(window)).tolist()

            # center
            df.center = df.center.apply(lambda x: x if x is not np.nan else np.full(2, np.nan))
            z_score = stats.zscore(np.stack(df.center.to_numpy()), nan_policy='omit')
            z_score = np.linalg.norm(z_score, axis=1)
            df.loc[z_score > 3, 'center'] = np.nan

            ## custom interpolation
            # valid values
            df.center = df.center.apply(lambda x: x if not np.isnan(x).any() else np.nan)
            valid = ~df.center.isna().values

            # nan nan
            df.center = df.center.apply(lambda x: x if x is not np.nan else np.full(2, np.nan))

            # interpolation
            values = np.stack(df.center)

            df.frame = df.frame.astype(float)
            inter = np.stack([
                np.interp(df.frame.values, df.frame[valid].values, values[valid][:,0]),
                np.interp(df.frame.values, df.frame[valid].values, values[valid][:,1])
            ], axis=-1)
            df.frame = df.frame.astype(np.int32)

            # set
            df.center = inter.tolist()

            # doesn't work in 2d
            # df.center = df.center.apply(lambda x: x if x is not np.nan else np.full(2, np.nan))
            # df.center = df.center.interpolate(method='nearest')  # doesnt work in 2d
            # df.center = df.center.apply(lambda x: np.array(x))
            ## custom interpolation
            if rolling:
                df.center = uniform_filter(np.stack(df.center.to_numpy()), size=(window,1)).tolist()
        except Exception as e:
            print('exception thrown')
            print(e)
            continue

        plt.plot(df.frame, df.radius, label="Video {}".format(idx))
        plt.grid()
        plt.legend()
        plt.xlabel("Frame / #")
        plt.ylabel("Radius / pixels")
        if rolling:
            plt.savefig(prefix + "/figs_interpolated_rolling/radius/radius_video_{}.png".format(idx), transparent=False)
        else:
            plt.savefig(prefix + "/figs_interpolated/radius/radius_video_{}.png".format(idx), transparent=False)
        plt.clf()

        if outlier_rejection:
            try:
                plt.scatter(np.stack(df[valid].center.to_numpy())[:,0], np.stack(df[valid].center.to_numpy())[:,1], label="Video {}".format(idx))
                plt.scatter(np.stack(df[~valid].center.to_numpy())[:,0], np.stack(df[~valid].center.to_numpy())[:,1], label="Video {}. Interpolated.".format(idx))
            except Exception as e:
                print(e)
        else:
            plt.scatter(np.stack(df.center.to_numpy())[:,0], np.stack(df.center.to_numpy())[:,1], label="Video {}".format(idx))
        plt.grid()
        plt.legend()
        if rolling:
            plt.savefig(prefix + "/figs_interpolated_rolling/center/center_video_{}.png".format(idx), transparent=False)
        else:
            plt.savefig(prefix + "/figs_interpolated/center/center_video_{}.png".format(idx), transparent=False)
        plt.clf()

    else:
        plt.plot(df.frame, df.radius, label="Video {}".format(idx))
        plt.grid()
        plt.legend()
        plt.xlabel("Frame / #")
        plt.ylabel("Radius / pixels")
        if rolling:
            plt.savefig(prefix + "/figs_rolling/radius/radius_video_{}.png".format(idx), transparent=False)
        else:
            plt.savefig(prefix + "/figs/radius/radius_video_{}.png".format(idx), transparent=False)
        plt.clf()


        df.center = df.center.apply(lambda x: x if x is not np.nan else np.full(2, np.nan))
        plt.scatter(np.stack(df.center.to_numpy())[:,0], np.stack(df.center.to_numpy())[:,1], label="Video {}".format(idx))
        plt.grid()
        plt.legend()
        if rolling:
            plt.savefig(prefix + "/figs_rolling/center/center_video_{}.png".format(idx), transparent=False)
        else:
            plt.savefig(prefix + "/figs/center/center_video_{}.png".format(idx), transparent=False)
        plt.clf()

