In [3]:
import os

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [5]:
"""Load data as data frame"""
src_train = '/home/melandur/tmp/csv/train/7_merged/'
src_test = '/home/melandur/tmp/csv/test/7_merged/'

def load_data(path):
    files = os.listdir(path)
    df_store = {}
    for file in files:
        if 'aha' in file in file and 'sample' not in file:
            file_path = os.path.join(path, file)
            df = pd.read_excel(file_path)
            name = f"{'_'.join(file.split('_')[1:3])}_{file.split('_')[-1]}".split('.xlsx')[0]
            if not 'global' in file:
                df_store[name] = df.iloc[1:, 1:]  # drop first column and row
    return df_store

df_train = load_data(src_train)
df_test = load_data(src_test)

In [None]:
def euclider(store, name, min=22, max=23):
    euclid_store = {}
    col_names = None
    for idx in range(1, 17):
        df_longit = store[f'longit_{name}_{idx}'].iloc[:, min:max]
        df_circumf = store[f'circumf_{name}_{idx}'].iloc[:, min:max]
        df_radial = store[f'radial_{name}_{idx}'].iloc[:, min:max]
        col_names = df_longit.columns.values[0]  # get case name
        # euclid distance
        df_longit = abs(df_longit) ** 2
        df_circumf = abs(df_circumf) ** 2
        df_radial = abs(df_radial) ** 2
        summed = df_longit + df_circumf.values + df_radial.values
        clean = summed ** (1 / 2)
        euclid_store[f'{idx}'] = clean
    return euclid_store, col_names

In [None]:
def get_data(i):
    x = df_e_train_s[f'{i}']
    x = x.drop(x.tail(1).index).melt()
    train_s = x.drop(x.columns[x.columns.str.contains('variable', case=False)], axis=1)['value']
    y = df_e_test_s[f'{i}']
    y = y.drop(y.tail(1).index).melt()
    test_s = y.drop(y.columns[y.columns.str.contains('variable', case=False)], axis=1)['value']

    x = df_e_train_v[f'{i}']
    x = x.drop(x.tail(1).index).melt()
    train_v = x.drop(x.columns[x.columns.str.contains('variable', case=False)], axis=1)['value']
    y = df_e_test_v[f'{i}']
    y = y.drop(y.tail(1).index).melt()
    test_v = y.drop(y.columns[y.columns.str.contains('variable', case=False)], axis=1)['value']

    x = df_e_train_a[f'{i}']
    x = x.drop(x.tail(1).index).melt()
    train_a = x.drop(x.columns[x.columns.str.contains('variable', case=False)], axis=1)['value']
    y = df_e_test_a[f'{i}']
    y = y.drop(y.tail(1).index).melt()
    test_a = y.drop(y.columns[y.columns.str.contains('variable', case=False)], axis=1)['value']

    x = df_e_train_sa[f'{i}']
    x = x.drop(x.tail(1).index).melt()
    train_sa = x.drop(x.columns[x.columns.str.contains('variable', case=False)], axis=1)['value']
    y = df_e_test_sa[f'{i}']
    y = y.drop(y.tail(1).index).melt()
    test_sa = y.drop(y.columns[y.columns.str.contains('variable', case=False)], axis=1)['value']

    train = pd.concat((train_v, train_s, train_a, train_sa), axis=1)
    train.columns = ['velocity', 'strain_rate', 'acceleration', 'strain_acceleration']
    test = pd.concat((test_v, test_s, test_a, test_sa), axis=1)
    test.columns = ['velocity', 'strain_rate', 'acceleration', 'strain_acceleration']
    return train, test

def get_limits(use_test=False):
    x_max, y_max, x_min, y_min = [], [], [], []
    for i in range(1, 17):
        data, test = get_data(i)
        if use_test:
            data = test
        x_max.append(data.strain_acceleration.max())
        x_min.append(data.strain_acceleration.min())
        y_max.append(data.acceleration.max())
        y_min.append(data.acceleration.min())
    return max(x_max), min(x_min), max(y_max), min(y_min)


for case_number in range(1, 50):
    df_e_train_s, col_name = euclider(df_train, 'strain', min=case_number, max=case_number + 1)
    df_e_test_s, _ = euclider(df_test, 'strain', min=case_number, max=case_number + 1)

    df_e_train_v, _ = euclider(df_train, 'velocity', min=case_number, max=case_number + 1)
    df_e_test_v, _ = euclider(df_test, 'velocity', min=case_number, max=case_number + 1)

    df_e_train_a, _ = euclider(df_train, 'acceleration', min=case_number, max=case_number + 1)
    df_e_test_a, _ = euclider(df_test, 'acceleration', min=case_number, max=case_number + 1)

    df_e_train_sa, _ = euclider(df_train, 'strain-acc', min=case_number, max=case_number + 1)
    df_e_test_sa, _ = euclider(df_test, 'strain-acc', min=case_number, max=case_number + 1)

    for i in range(1, 17):
        train, test = get_data(i)
        x_max, x_min, y_max, y_min = get_limits(use_test=True)
        # p = sns.jointplot(x=train.strain_acceleration, y=train.acceleration, xlim=(x_min, x_max), ylim=(y_min, y_max))
        # plt.title(f'Train AHA {i}')
        p = sns.jointplot(x=test.strain_acceleration, y=test.acceleration, xlim=(x_min, x_max), ylim=(y_min, y_max))
        plt.title(f'Control\n{col_name}\nAHA {i}')
        p.ax_joint.set_xlabel('strain-acceleration [1/s^2]')
        p.ax_joint.set_ylabel('acceleration [mm/s^2]')
        p.figure.tight_layout()
        plt.show()
        # plt.savefig(f'/home/melandur/Downloads/Pictures/images/test/case_{case_number}_AHA_{i}')
        # plt.close()

    # sns.jointplot(test.acceleration, test.velocity, xlim=(0,2), ylim=(0,50))
    # plt.title(f'Cont_Segment {i}')
    # plt.xlim(0,3)
    # plt.ylim(0,50)

    # sns.scatterplot(train.acceleration, train.velocity)
    # sns.scatterplot(test.acceleration, test.strain_rate)
    # plt.title('myocarditis')

    # plt.subplot(122)
    # plt.title('control')
    # sns.scatterplot(test.strain_rate, test.velocity)
    # plt.show()


In [None]:
def get_data(i):
    x = df_e_train_s[f'{i}']
    x = x.drop(x.tail(1).index).melt()
    train_s = x.drop(x.columns[x.columns.str.contains('variable', case=False)], axis=1)['value']
    y = df_e_test_s[f'{i}']
    y = y.drop(y.tail(1).index).melt()
    test_s = y.drop(y.columns[y.columns.str.contains('variable', case=False)], axis=1)['value']

    x = df_e_train_v[f'{i}']
    x = x.drop(x.tail(1).index).melt()
    train_v = x.drop(x.columns[x.columns.str.contains('variable', case=False)], axis=1)['value']
    y = df_e_test_v[f'{i}']
    y = y.drop(y.tail(1).index).melt()
    test_v = y.drop(y.columns[y.columns.str.contains('variable', case=False)], axis=1)['value']

    x = df_e_train_a[f'{i}']
    x = x.drop(x.tail(1).index).melt()
    train_a = x.drop(x.columns[x.columns.str.contains('variable', case=False)], axis=1)['value']
    y = df_e_test_a[f'{i}']
    y = y.drop(y.tail(1).index).melt()
    test_a = y.drop(y.columns[y.columns.str.contains('variable', case=False)], axis=1)['value']

    x = df_e_train_sa[f'{i}']
    x = x.drop(x.tail(1).index).melt()
    train_sa = x.drop(x.columns[x.columns.str.contains('variable', case=False)], axis=1)['value']
    y = df_e_test_sa[f'{i}']
    y = y.drop(y.tail(1).index).melt()
    test_sa = y.drop(y.columns[y.columns.str.contains('variable', case=False)], axis=1)['value']

    train = pd.concat((train_v, train_s, train_a, train_sa), axis=1)
    train.columns = ['velocity', 'strain_rate', 'acceleration', 'strain_acceleration']
    test = pd.concat((test_v, test_s, test_a, test_sa), axis=1)
    test.columns = ['velocity', 'strain_rate', 'acceleration', 'strain_acceleration']
    return train, test


def rename_columns(df, i):
    column_names = df.columns
    new_names = []
    for name in column_names:
        new_names.append(f'{name}_{i}')
    df.columns = new_names
    return df

for case_number in range(1, 50):
    df_e_train_s, col_name = euclider(df_train, 'strain', min=case_number, max=case_number + 1)
    df_e_test_s, _ = euclider(df_test, 'strain', min=case_number, max=case_number + 1)

    df_e_train_v, _ = euclider(df_train, 'velocity', min=case_number, max=case_number + 1)
    df_e_test_v, _ = euclider(df_test, 'velocity', min=case_number, max=case_number + 1)

    df_e_train_a, _ = euclider(df_train, 'acceleration', min=case_number, max=case_number + 1)
    df_e_test_a, _ = euclider(df_test, 'acceleration', min=case_number, max=case_number + 1)

    df_e_train_sa, _ = euclider(df_train, 'strain-acc', min=case_number, max=case_number + 1)
    df_e_test_sa, _ = euclider(df_test, 'strain-acc', min=case_number, max=case_number + 1)

    # for i in range(1, 17):
    #     if i == 1:
    #         _, test = get_data(i)
    #         fused_segs = rename_columns(test, i)
    #     else:
    #         train, test = get_data(i)
    #         data = rename_columns(test, i)
    #         fused_segs = fused_segs.join(data)
    #
    x_max, x_min, y_max, y_min = get_limits(use_test=False)
    p = sns.jointplot(x=train.strain_acceleration, y=train.acceleration, xlim=(x_min, x_max), ylim=(y_min, y_max))
    plt.title(f'Myo\n{col_name}\nAHA {i}')

    # g = sns.PairGrid(fused_segs, diag_sharey=False)
    # g.map_upper(sns.scatterplot)
    # g.map_lower(sns.kdeplot)
    # g.map_diag(sns.kdeplot)
    # plt.tight_layout()
    plt.show()