In [1]:
import numpy as np
import pandas as pd
import pickle
import plotly.graph_objects as go
import plotly.express as px
from scipy import stats

import numpy

# Loading the data

## Specifying gesture

In [2]:
gesture_name = "click"
subjects = [f"Subject{i}" for i in range(1, 6)]

In [3]:
gesture_name = "click"
subjects = [f"Subject{i}" for i in range(1, 6)]

gesture_names = ["click", "closed", "down", "mouse_tracking", "negative_closed", "negative_mouse_tracking",
                 "negative_side", "negative_up", "side", "up"]

diffs = list()

for gesture_name in gesture_names:
    express = False

    if express:
        with open(f"data/express/50cm/{gesture_name}", "rb") as file:
            data_50 = pickle.load(file)

        with open(f"data/express/75cm/{gesture_name}", "rb") as file:
            data_75 = pickle.load(file)

        with open(f"data/express/100cm/{gesture_name}", "rb") as file:
            data_100 = pickle.load(file)

    else:
        data_50  = list()
        data_75  = list()
        data_100 = list()
        
        for subject in subjects:
            with open(f"data/randomized/{subject}/50cm/{gesture_name}", "rb") as file:
                temp = pickle.load(file)
                data_50 += temp

            with open(f"data/randomized/{subject}/75cm/{gesture_name}", "rb") as file:
                temp = pickle.load(file)
                data_75 += temp

            with open(f"data/randomized/{subject}/100cm/{gesture_name}", "rb") as file:
                temp = pickle.load(file)
                data_100 += temp
                
    def normalize(z: float, max: float, min: float) -> float:
        norm = (z - min)/(max - min)
        round_norm = round(norm, 3)
        
        return round_norm
    def normalize_landmarks(landmarks: list) -> list:
        norm_landmarks = list()

        for landmark in landmarks:
            xs = [landmark[i][0] for i in range(len(landmark))]
            ys = [landmark[i][1] for i in range(len(landmark))]

            xmax = max(xs)
            ymax = max(ys)
            xmin = min(xs)
            ymin = min(ys)

            norm_landmark = list()

            for x, y in zip(xs, ys):
                norm = [normalize(z=x, max=xmax, min=xmin), normalize(z=y, max=ymax, min=ymin)]

                norm_landmark.append(norm)

            norm_landmarks.append(norm_landmark)

        return norm_landmarks
    norm_50  = normalize_landmarks(landmarks=data_50)
    norm_75  = normalize_landmarks(landmarks=data_75)
    norm_100 = normalize_landmarks(landmarks=data_100)
    def get_mean_gesture(samples: list):
        points_x = {}
        points_y = {}

        for i in range(21):
            xs = [sample[i][0] for sample in samples]
            ys = [sample[i][1] for sample in samples]

            points_x[i] = xs
            points_y[i] = ys

        mean_gesture = list()

        for i in range(21):
            x_mean = np.mean(points_x.get(i))
            y_mean = np.mean(points_y.get(i))

            mean_gesture.append([x_mean, y_mean])

        return mean_gesture
    mean_gesture_50cm  = get_mean_gesture(samples=norm_50)
    mean_gesture_75cm  = get_mean_gesture(samples=norm_75)
    mean_gesture_100cm = get_mean_gesture(samples=norm_100)

    xs_50 = [mean_gesture_50cm[i][0] for i in range(len(mean_gesture_50cm))]
    ys_50 = [mean_gesture_50cm[i][1] for i in range(len(mean_gesture_50cm))]

    xs_75 = [mean_gesture_75cm[i][0] for i in range(len(mean_gesture_75cm))]
    ys_75 = [mean_gesture_75cm[i][1] for i in range(len(mean_gesture_75cm))]

    xs_100 = [mean_gesture_100cm[i][0] for i in range(len(mean_gesture_100cm))]
    ys_100 = [mean_gesture_100cm[i][1] for i in range(len(mean_gesture_100cm))]

    fig = go.Figure()

    fig.add_trace(go.Scatter(x=xs_50, y=ys_50, mode="markers", name="50 cm"))
    fig.add_trace(go.Scatter(x=xs_75, y=ys_75, mode="markers", name="75 cm"))
    fig.add_trace(go.Scatter(x=xs_100, y=ys_100, mode="markers", name="100 cm"))

    fig.update_layout(
        xaxis = dict(autorange="reversed"),
        yaxis = dict(autorange="reversed"),
    )

    fig.update_xaxes(visible=False)
    fig.update_yaxes(visible=False)

    # fig.show()

    dif_matrix = pd.DataFrame(columns=["50 cm", "75 cm", "100 cm"], index=["50 cm", "75 cm", "100 cm"])

    dist_map = {
        "50 cm": mean_gesture_50cm,
        "75 cm": mean_gesture_75cm,
        "100 cm": mean_gesture_100cm
    }


    for dist, gest in dist_map.items():
        for comp_dist, comp in dist_map.items():
            if gest == comp:
                dif_matrix[dist].loc[comp_dist] = np.NaN

            else:
                difs = list()

                xs_gest = [gest[i][0] for i in range(len(gest))]
                ys_gest = [gest[i][1] for i in range(len(gest))]

                xs_comp = [comp[i][0] for i in range(len(comp))]
                ys_comp = [comp[i][1] for i in range(len(comp))]

                for i in range(len(xs_gest)):
                    point_difference = pow(pow(xs_comp[i] - xs_gest[i], 2) + pow(ys_comp[i] - ys_gest[i], 2), 0.5)
                    difs.append(point_difference)
                
                mean_dif = np.mean(difs)

                dif_matrix[dist].loc[comp_dist] = mean_dif

    diffs.append(dif_matrix)
    # print(f"            ----- {gesture_name} -----")
    # print(dif_matrix)x

In [23]:


fig = go.Figure()

# xss = [norm_50[0][i][0] for i in range(21)]
# yss = [norm_50[0][i][1] for i in range(21)]
# fig.add_trace(go.Scatter(x=xss, y=yss, mode="markers", name="50 cm"))

# xss = [norm_50[1][i][0] for i in range(21)]
# yss = [norm_50[1][i][1] for i in range(21)]
# fig.add_trace(go.Scatter(x=xss, y=yss, mode="markers", name="50 cm"))

# xss = [norm_50[2][i][0] for i in range(21)]
# yss = [norm_50[2][i][1] for i in range(21)]
# fig.add_trace(go.Scatter(x=xss, y=yss, mode="markers", name="50 cm"))

fig.add_trace(go.Scatter(x=xs_50, y=ys_50, mode="markers", name="50 cm"))

fig.update_layout(
    xaxis = dict(autorange="reversed"),
    yaxis = dict(autorange="reversed"),
)

fig.update_xaxes(visible=False)
fig.update_yaxes(visible=False)

fig.show()

In [None]:
all_diffs = list()

for df in diffs:
    temp = [df.iloc[0, 1], df.iloc[0, 2], df.iloc[1, 2]]

    all_diffs += temp

blue_difs  = [all_diffs[3*i] for i in range(10)]
red_difs   = [all_diffs[3*i + 1] for i in range(10)]
green_difs = [all_diffs[3*i + 2] for i in range(10)]

# fig = px.scatter(all_diffs, color=["blue", "green", "purple"]*10, symbol=["circle", "square", "diamond"]*10)
fig = px.bar(all_diffs, color=["blue", "red", "green"]*10)

mean_dif = np.mean(green_difs)
fig.add_hline(y=mean_dif, line=dict(color="red", dash="dash"))

for x in [-0.5 + 3*i for i in range(11)]:
    fig.add_vline(x=x, line=dict(color="green", dash="longdashdot"))

fig.update_xaxes(visible=False)
fig.update_layout(yaxis_title="Diferença geométrica", legend_title="Par de distâncias")

print(mean_dif)
fig.show()

In [None]:
np.std(blue_difs)

In [None]:
blue_difs  = [all_diffs[3*i] for i in range(10)]
red_difs   = [all_diffs[3*i + 1] for i in range(10)]
green_difs = [all_diffs[3*i + 2] for i in range(10)]

In [None]:
fig = px.bar(blue_difs)
blue_mean_dif = np.mean(blue_difs)
print(blue_mean_dif)
fig.add_hline(y=blue_mean_dif, line=dict(color="red", dash="dash"))

for x in [-0.5 + i for i in range(11)]:
    fig.add_vline(x=x, line=dict(color="green", dash="longdashdot"))

fig.update_xaxes(visible=False)
fig.update_layout(yaxis_title="Diferença geométrica")
fig.show()

In [None]:
fig = px.scatter(red_difs, color=["red"]*10, symbol=["square"]*10)

red_mean_dif = np.mean(red_difs)
fig.add_hline(y=red_mean_dif, line=dict(color="red", dash="dash"))

for x in [-0.5 + i for i in range(11)]:
    fig.add_vline(x=x, line=dict(color="green", dash="longdashdot"))

fig.update_xaxes(visible=False)
fig.update_layout(yaxis_title="Diferença geométrica")
fig.show()

In [None]:
fig = px.scatter(blue_difs)

blue_mean_dif = np.mean(blue_difs)
fig.add_hline(y=blue_mean_dif, line=dict(color="red", dash="dash"))

for x in [-0.5 + i for i in range(11)]:
    fig.add_vline(x=x, line=dict(color="green", dash="longdashdot"))

fig.update_xaxes(visible=False)
fig.update_layout(yaxis_title="Diferença geométrica")
fig.show()

In [None]:
import plotly.figure_factory as ff

hist_data = [all_diffs]
group_labels = ['distplot'] # name of the dataset

fig = ff.create_distplot(hist_data, group_labels, bin_size=0.03)
fig.show()

## 50 cm data

In [None]:
data_50 = list()

for subject in subjects:
    with open(f"data/randomized/{subject}/50cm/{gesture_name}", "rb") as file:
        temp = pickle.load(file)
        data_50 += temp

## 75 cm data

In [None]:
data_75 = list()

for subject in subjects:
    with open(f"data/randomized/{subject}/75cm/{gesture_name}", "rb") as file:
        temp = pickle.load(file)
        data_75 += temp

## 100 cm data

In [None]:
data_100 = list()

for subject in subjects:
    with open(f"data/randomized/{subject}/100cm/{gesture_name}", "rb") as file:
        temp = pickle.load(file)
        data_100 += temp

# Normalizing the landmarks using MinMax Scaling

In [None]:
def normalize(z: float, max: float, min: float) -> float:
    norm = (z - min)/(max - min)
    round_norm = round(norm, 3)
    
    return round_norm

In [None]:
def normalize_landmarks(landmarks: list) -> list:
    norm_landmarks = list()

    for landmark in landmarks:
        xs = [landmark[i][0] for i in range(len(landmark))]
        ys = [landmark[i][1] for i in range(len(landmark))]

        xmax = max(xs)
        ymax = max(ys)
        xmin = min(xs)
        ymin = min(ys)

        norm_landmark = list()

        for x, y in zip(xs, ys):
            norm = [normalize(z=x, max=xmax, min=xmin), normalize(z=y, max=ymax, min=ymin)]

            norm_landmark.append(norm)

        norm_landmarks.append(norm_landmark)

    return norm_landmarks

In [None]:
norm_50  = normalize_landmarks(landmarks=data_50)
norm_75  = normalize_landmarks(landmarks=data_75)
norm_100 = normalize_landmarks(landmarks=data_100)

In [None]:
xs_1 = [point[0] for point in norm_50[0]]
ys_1 = [point[1] for point in norm_50[0]]

xs_2 = [point[0] for point in norm_50[1]]
ys_2 = [point[1] for point in norm_50[1]]

xs_3 = [point[0] for point in norm_50[2]]
ys_3 = [point[1] for point in norm_50[2]]

xs_4 = [point[0] for point in norm_50[3]]
ys_4 = [point[1] for point in norm_50[3]]

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=xs_1, y=xs_1, mode="markers", name="Sample X"))
fig.add_trace(go.Scatter(x=xs_1, y=ys_1, mode="markers+text", name="Sample 1", opacity=1, text=[f"Point {i}" for i in range(len(norm_50[0]))], textposition="bottom center"))
fig.add_trace(go.Scatter(x=xs_2, y=ys_2, mode="markers+text", name="Sample 2", opacity=0.5, text=[f"Point {i}" for i in range(len(norm_50[0]))], textposition="top center"))
fig.add_trace(go.Scatter(x=xs_3, y=ys_3, mode="markers+text", name="Sample 3", opacity=0.1, text=[f"Point {i}" for i in range(len(norm_50[0]))], textposition="bottom right"))
# fig.add_trace(go.Scatter(x=xs_4, y=ys_4, mode="markers+text", name="Sample 4", opacity=0.25, text=[f"Point {i}" for i in range(len(norm_50[0]))]))

fig.update_layout(
    xaxis=dict(autorange="reversed"),
    yaxis=dict(autorange="reversed"),
)

fig.update_xaxes(visible=False)
fig.update_yaxes(visible=False)

fig.show()

# Checking the "mean" gesture for each gesture

In [None]:
def get_mean_gesture(samples: list):
    points_x = {}
    points_y = {}

    for i in range(21):
        xs = [sample[i][0] for sample in samples]
        ys = [sample[i][1] for sample in samples]

        points_x[i] = xs
        points_y[i] = ys

    mean_gesture = list()

    for i in range(21):
        x_mean = np.mean(points_x.get(i))
        y_mean = np.mean(points_y.get(i))

        mean_gesture.append([x_mean, y_mean])

    return mean_gesture

In [None]:
mean_gesture_50cm  = get_mean_gesture(samples=norm_50)
mean_gesture_75cm  = get_mean_gesture(samples=norm_75)
mean_gesture_100cm = get_mean_gesture(samples=norm_100)

xs_50 = [mean_gesture_50cm[i][0] for i in range(len(mean_gesture_50cm))]
ys_50 = [mean_gesture_50cm[i][1] for i in range(len(mean_gesture_50cm))]

xs_75 = [mean_gesture_75cm[i][0] for i in range(len(mean_gesture_75cm))]
ys_75 = [mean_gesture_75cm[i][1] for i in range(len(mean_gesture_75cm))]

xs_100 = [mean_gesture_100cm[i][0] for i in range(len(mean_gesture_100cm))]
ys_100 = [mean_gesture_100cm[i][1] for i in range(len(mean_gesture_100cm))]

fig = go.Figure()

fig.add_trace(go.Scatter(x=xs_50, y=ys_50, mode="markers", name="50 cm"))
fig.add_trace(go.Scatter(x=xs_75, y=ys_75, mode="markers", name="75 cm"))
fig.add_trace(go.Scatter(x=xs_100, y=ys_100, mode="markers", name="100 cm"))

fig.update_layout(
    xaxis = dict(autorange="reversed"),
    yaxis = dict(autorange="reversed"),
)

fig.update_xaxes(visible=False)
fig.update_yaxes(visible=False)

fig.show()

# Calculating the distances of each point to the center

In [None]:
def get_distance_of_points_to_center(landmark: list) -> list:
    distances = list()

    xs = [landmark[i][0] for i in range(len(landmark))]
    ys = [landmark[i][1] for i in range(len(landmark))]

    xmean = np.mean(xs)
    ymean = np.mean(ys)

    for point in landmark:
        x = point[0]
        y = point[1]

        distance = pow(pow(x - xmean, 2) + pow(y - ymean, 2), 0.5)

        distances.append(distance)
    
    return distances

In [None]:
def get_landmarks_distances(landmarks: list) -> list:
    distances = list()

    for landmark in landmarks:
        lm_distances = get_distance_of_points_to_center(landmark=landmark)

        distances.append(lm_distances)

    return distances

In [None]:
def get_mean_distances_for_subset(landmarks: list) -> float:
    distances = get_landmarks_distances(landmarks=landmarks)

    mean_distances = [np.mean(dist) for dist in distances]

    return mean_distances

def get_mean_var_for_subset(landmarks: list) -> float:
    distances = get_landmarks_distances(landmarks=landmarks)

    mean_distances = [np.var(dist) for dist in distances]

    return mean_distances

In [None]:
means_50 = get_mean_distances_for_subset(landmarks=norm_50)
vars_50  = get_mean_var_for_subset(landmarks=norm_50)

means_75 = get_mean_distances_for_subset(landmarks=norm_75)
vars_75  = get_mean_var_for_subset(landmarks=norm_75)

means_100 = get_mean_distances_for_subset(landmarks=norm_100)
vars_100  = get_mean_var_for_subset(landmarks=norm_100)

In [None]:
means_df = pd.DataFrame([means_50, means_75, means_100]).T
vars_df  = pd.DataFrame([vars_50, vars_75, vars_100]).T

In [None]:
px.scatter(means_df)

# Hypothesis testing. First we check if the three variances follow a normal distribution. If all of them do, we perfom an ANOVA One Way test, if one is not normal, we perfom a Kruskal test. Both tests test a null hypothesis that the variances are similar. If a p-value is found to be less than 0.05, we can reject this hypothesis. We can test the normality of the variances using a Shapiro Wick test. It's null hypothesis is that the data is drown from a normal distribution.

In [None]:
shapiro_results = {
    "50cm": stats.shapiro(vars_50),
    "75cm": stats.shapiro(vars_75),
    "100cm": stats.shapiro(vars_100)
}

p_values = [results.pvalue for results in shapiro_results.values()]

significant_pvalues = [p_value > 0.05 for p_value in p_values]

is_normal = all(significant_pvalues)

if is_normal:
    print("All variances are drown from a normal distribution! Their p-values ares:")
    print(f"    50cm: {p_values[0]}    75cm: {p_values[1]}    100cm: {p_values[2]}\n")

    print("Using the ANOVA One Way test...\n")

    result = stats.f_oneway(means_50, means_75, means_100)

else:
    print("One or more of the variances is NOT drown from a normal distribution. Their p-values are:")
    print(f"    50cm: {p_values[0]}    75cm: {p_values[1]}    100cm: {p_values[2]}\n")

    print("Using the Kruskal test...\n")

    result = stats.kruskal(vars_50, vars_75, vars_100)

print(f"The test p-value is: {result.pvalue}\n")

if result.pvalue > 0.05:
    print("They are the same!")

else:
    print("They are NOT the same!")

In [None]:
vars = {
    "50cm": vars_50,
    "75cm": vars_75,
    "100cm": vars_100
}

In [None]:
def u_test_dataframe(vars: dict) -> pd.DataFrame:
    df = pd.DataFrame(columns=list(vars.keys()), index=list(vars.keys()))

    for var in list(vars.keys()):
        for pair in list(vars.keys()):
            if var == pair:
                result = 1
            
            else:
                result = stats.mannwhitneyu(vars.get(var), vars.get(pair))
                result = result.pvalue
            df[var].loc[pair] = result

    return df

In [None]:
import plotly.figure_factory as ff

In [None]:
fig = ff.create_distplot([vars_50, vars_75, vars_100], ["50cm", "75cm", "100cm"])
fig.show()

In [None]:
def main(gesture_name: str) -> None:
    print(f"------------- Gesture: {gesture_name} -------------\n")

    data_50 = list()
    
    subjects = [f"Subject{i}" for i in range(1, 6)]

    for subject in subjects:
        with open(f"data/randomized/{subject}/50cm/{gesture_name}", "rb") as file:
            temp = pickle.load(file)
            data_50 += temp

    data_75 = list()

    for subject in subjects:
        with open(f"data/randomized/{subject}/75cm/{gesture_name}", "rb") as file:
            temp = pickle.load(file)
            data_75 += temp

    data_100 = list()

    for subject in subjects:
        with open(f"data/randomized/{subject}/100cm/{gesture_name}", "rb") as file:
            temp = pickle.load(file)
            data_100 += temp


    norm_50  = normalize_landmarks(landmarks=data_50)
    norm_75  = normalize_landmarks(landmarks=data_75)
    norm_100 = normalize_landmarks(landmarks=data_100)


    means_50 = get_mean_distances_for_subset(landmarks=norm_50)
    vars_50  = get_mean_var_for_subset(landmarks=norm_50)

    means_75 = get_mean_distances_for_subset(landmarks=norm_75)
    vars_75  = get_mean_var_for_subset(landmarks=norm_75)

    means_100 = get_mean_distances_for_subset(landmarks=norm_100)
    vars_100  = get_mean_var_for_subset(landmarks=norm_100)

    shapiro_results = {
        "50cm": stats.shapiro(vars_50),
        "75cm": stats.shapiro(vars_75),
        "100cm": stats.shapiro(vars_100)
    }

    p_values = [results.pvalue for results in shapiro_results.values()]

    significant_pvalues = [p_value > 0.05 for p_value in p_values]

    is_normal = all(significant_pvalues)

    if is_normal:
        print("All variances are drown from a normal distribution! Their p-values ares:")
        print(f"    50cm: {p_values[0]}    75cm: {p_values[1]}    100cm: {p_values[2]}\n")

        print("Using the ANOVA One Way test...\n")

        result = stats.f_oneway(vars_50, vars_75, vars_100)
        # result = stats.f_oneway(means_50, means_75, means_100)
        
        print(f"The test p-value is: {result.pvalue}\n")

        if result.pvalue > 0.05:
            print("They are the same!\n")

        else:
            print("They are NOT the same!\n")

    else:
        kruskal = True

        if kruskal:
            print("One or more of the variances is NOT drown from a normal distribution. Their p-values are:")
            print(f"    50cm: {p_values[0]}    75cm: {p_values[1]}    100cm: {p_values[2]}\n")

            print("Using the Kruskal test...\n")

            result = stats.kruskal(vars_50, vars_75, vars_100)
            # result = stats.kruskal(means_50, means_75, means_100)

            print(f"The test p-value is: {result.pvalue}\n")

            if result.pvalue > 0.05:
                print("They are the same!\n")

            else:
                print("They are NOT the same!\n")

        else:
            vars = {
                "50cm": vars_50,
                "75cm": vars_75,
                "100cm": vars_100
            }

            u_test_df = u_test_dataframe(vars=vars)
            print(u_test_df)

In [None]:
gesture_names = ["click", "closed", "down", "mouse_tracking", "negative_closed", "negative_mouse_tracking",
                 "negative_side", "negative_up", "side", "up"]

for gesture_name in gesture_names:
    main(gesture_name=gesture_name)