In [None]:
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.svm import SVC
import seaborn as sns

In [None]:
def load_data(block, data_folder):
    return pd.read_csv(f'{data_folder}/pre_data{block}.csv')

In [None]:
def scatter_and_save_fig(data, subject, block, scatter_folder):
    sns.scatterplot(data=data, x='x_left', y='y_left', hue='label')
    plt.savefig(f'{scatter_folder}/block{block}', dpi=400)

In [None]:
def get_train_test_split(data):
    np_data = data.to_numpy()
    y = np_data[:, 0]
    x = np_data[:, 1:]
    return train_test_split(x, y, test_size=0.2)

In [None]:
def classic_svc(X, x_test, y, y_test, scores_dict, block):
    clf = SVC(gamma='auto')
    clf.fit(X, y)
    scores_dict[f'svc_{block}'] = clf.score(x_test,y_test)

In [None]:
def manual_count_train_lables(X, y):
    train_up = []
    train_down = []
    train_left = []
    train_right = []
    for idx, lable in enumerate(y):
        if lable == 'up':
            train_up.append(X[idx])
        if lable == 'down':
            train_down.append(X[idx])
        if lable == 'left':
            train_left.append(X[idx])
        if lable == 'right':
            train_right.append(X[idx])
    return train_down, train_up, train_left, train_right

In [None]:
def calculate_avg(train_down, train_up, train_left, train_right):
    avg_up = (np.array(train_up)).mean(axis=0)
    avg_down = (np.array(train_down)).mean(axis=0)
    avg_left = (np.array(train_left)).mean(axis=0)
    avg_right = (np.array(train_right)).mean(axis=0)
    return avg_down, avg_up, avg_left, avg_right

In [None]:
def manual_calculate_score(avg_down, avg_up, avg_left, avg_right, x_test, y_test, scores_dict, block): 
    correct = 0
    incorrect = 0
    for idx, t in enumerate(x_test):
        dist_up = np.linalg.norm(np.power(t - avg_up, 2))
        dist_down = np.linalg.norm(np.power(t - avg_down, 2))
        dist_left = np.linalg.norm(np.power(t - avg_left, 2))
        dist_right = np.linalg.norm(np.power(t - avg_right, 2))
        dist_arr = [dist_up, dist_down, dist_left, dist_right]
        lables_arr = ['up','down', 'left', 'right']
        min_d = np.argmin(dist_arr) 
        if y_test[idx] == lables_arr[min_d]:
            correct += 1
        else:
            incorrect += 1
    scores_dict[f'manual_{block}'] = correct / len(y_test)

In [None]:
scores_dict = {}
num_partitions = 10

In [None]:
for part in range(num_partitions):
    data = load_data(part, 'data')
    scatter_and_save_fig
    X, x_test, y, y_test = get_train_test_split(data)
    classic_svc(X, x_test, y, y_test, scores_dict, part)
    train_down, train_up, train_left, train_right = manual_count_train_lables(X, y)
    avg_down, avg_up, avg_left, avg_right = calculate_avg(train_down, train_up, train_left, train_right)
    manual_calculate_score(avg_down, avg_up, avg_left, avg_right, x_test, y_test, scores_dict, part)

Final results

In [None]:
# calculate average of the first half and last half of timeframes

man = [0 for i in range(num_partitions)]
svc = [0 for i in range(num_partitions)]
for key, value in scores_dict.items():
    classifier, idx = key.split('_')
    if classifier == 'manual':
        man[int(idx)] = value
    else:
        svc[int(idx)] = value
h = int(num_partitions / 2)
np.average(man[:h]), np.average(svc[:h]), np.average(man[h:]), np.average(svc[h:])

In [None]:
# plot accuracy curve

x = [i for i in range(num_partitions)]
plt.plot(x, man)
plt.plot(x, svc, color='red')
plt.xlabel('time frame')
plt.ylabel('accuracy')
plt.title("Accuracy by time farmes")
plt.legend(['maximum correlation', 'svm'])
plt.savefig('coherence', dpi=400)