In [155]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import plotly
import plotly.express as px
import plotly.graph_objects as go
sns.set()

In [156]:
soggetto = "soggetto_7"

In [157]:
# read soggetto_n_input.csv
df_input = pd.read_csv("Datasets/" + soggetto + '_input.csv', sep=',')
# remove SIGMA and bit_type columns
df_input = df_input.drop(['SIGMA', 'bit_type'], axis=1)
#df_input

In [158]:
# read soggetto_n_targets.csv
df_target = pd.read_csv("Datasets/" + soggetto + '_targets.csv', sep=',')
#df_target

In [159]:
# concat the two dataframe on columns
df = pd.concat([df_input, df_target], axis=1)
df

Unnamed: 0,bit,TrRi,RiTr,Plu,Ttra,Ttrb,Tr,MeTr,MoTr,RiRi,...,Tr.1,MeTr.1,MoTr.1,RiRi.1,MoFo,Mmof,MoRi,MeRi,Ri,Comp
0,0.568182,1,1,1.0,0.79,0,0.18,1,0.79,0.0,...,0.9096,-0.14,0.1288,1.14,0.1288,1.14,1.14,-0.14,-0.14,0.1928
1,0.431818,1,1,1.0,0.79,0,0.18,1,0.79,0.0,...,0.18,1.0,0.79,0.0,0.79,0.0,0.0,1.0,1.0,0.74
2,0.672646,1,1,0.68,1.0,0,0.01,1,1.0,0.33,...,0.7254,0.27,0.27,0.5782,0.4264,0.73,0.5782,0.27,0.4172,0.4126
3,0.327354,1,1,0.68,1.0,0,0.01,1,1.0,0.33,...,0.01,1.0,1.0,0.33,0.66,0.0,0.33,1.0,0.68,0.69
4,0.75,1,1,0.0,1.0,0,0.04,1,1.0,1.0,...,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
5,0.25,1,1,0.0,1.0,0,0.04,1,1.0,1.0,...,0.04,1.0,1.0,1.0,0.04,0.0,1.0,1.0,0.0,0.86
6,0.626305,1,1,0.2,0.04,0,1.0,1,0.04,0.82,...,0.105,0.105,0.8634,0.2472,0.3025,0.895,0.2472,0.105,0.737,0.2946
7,0.373695,1,1,0.2,0.04,0,1.0,1,0.04,0.82,...,1.0,1.0,0.04,0.82,0.75,0.0,0.82,1.0,0.2,0.76
8,0.625,1,1,0.0,0.14,0,0.9,1,0.14,1.0,...,0.18,0.1,0.788,0.1,0.18,0.9,0.1,0.1,0.9,0.252
9,0.375,1,1,0.0,0.14,0,0.9,1,0.14,1.0,...,0.9,1.0,0.14,1.0,0.9,0.0,1.0,1.0,0.0,0.81


In [160]:
# calculate shortest path that go through all the points in the dataset (TSP)
# considering each row as a point in a 31-dimensional space
# and using the euclidean distance as metric
from scipy.spatial.distance import pdist, squareform

# calculate the euclidean distance between each pair of points
distance_matrix = squareform(pdist(df, metric='euclidean'))
distance_matrix

array([[0.        , 3.89331419, 1.74398314, 3.73206889, 3.0804659 ,
        3.9229025 , 3.11245247, 3.66431076, 3.38572885, 3.77753194,
        3.08336994, 3.93517623, 2.38667096, 3.65286499, 0.66773701,
        4.05189455, 3.07190523, 3.60228429],
       [3.89331419, 0.        , 2.49801861, 1.06259715, 2.77457018,
        3.06634275, 3.15929838, 2.97519382, 3.30574821, 3.28085786,
        2.77779403, 3.07217478, 2.3142476 , 1.82130631, 3.57976048,
        0.57498904, 3.02947585, 2.96342812],
       [1.74398314, 2.49801861, 0.        , 2.29936453, 1.66005624,
        2.86388136, 2.3419825 , 2.77163606, 2.48711213, 2.93297529,
        1.66417749, 2.87648822, 0.75991883, 2.30530767, 1.41536301,
        2.65540331, 2.00430939, 2.63676104],
       [3.73206889, 1.06259715, 2.29936453, 0.        , 2.19996123,
        2.1165027 , 3.00391788, 2.74895389, 3.13976512, 2.88687896,
        2.20307272, 2.12296577, 1.94590546, 0.83751429, 3.44745351,
        0.99613122, 2.82075201, 2.43462519],
    

In [161]:
import networkx as nx

In [162]:
# create a graph with weighted edges from the distance matrix
G = nx.from_numpy_matrix(distance_matrix)

In [163]:
tsp = nx.approximation.traveling_salesman_problem
path = tsp(G, cycle=False)
path

[7, 9, 17, 5, 11, 13, 3, 15, 1, 4, 10, 2, 14, 0, 12, 16, 8, 6]

In [164]:
# calculate the length of the path
path_length = 0
for i in range(len(path) - 1):
    path_length += distance_matrix[path[i], path[i+1]]
path_length

18.82087329502511

In [165]:
# create a dataframe for saving the deltas of each variable (31 columns) between each step of the path
df_deltas = pd.DataFrame(columns=df.columns)

# calculate the deltas between each step of the path
for i in range(len(path) - 1):
    df_deltas.loc[i] = df.iloc[path[i+1]] - df.iloc[path[i]]
df_deltas

Unnamed: 0,bit,TrRi,RiTr,Plu,Ttra,Ttrb,Tr,MeTr,MoTr,RiRi,...,Tr.1,MeTr.1,MoTr.1,RiRi.1,MoFo,Mmof,MoRi,MeRi,Ri,Comp
0,0.001305,0.0,0.0,-0.2,0.1,0.0,-0.1,0.0,0.1,0.18,...,-0.1,0.0,0.1,0.18,0.15,0.0,0.18,0.0,-0.2,0.05
1,-0.016026,0.0,0.0,0.09,0.19,0.0,-0.21,0.0,0.19,0.0,...,-0.21,0.0,0.19,0.0,-0.33,0.0,0.0,0.0,0.09,-0.09
2,-0.108974,0.0,0.0,-0.09,0.67,0.0,-0.65,0.0,0.67,0.0,...,-0.65,0.0,0.67,0.0,-0.53,0.0,0.0,0.0,-0.09,0.14
3,0.0,0.0,0.0,0.01,0.0,0.0,-0.03,0.0,0.0,0.0,...,-0.03,0.0,0.0,0.0,-0.03,0.0,0.0,0.0,0.01,0.01
4,0.032297,0.0,0.0,0.41,0.0,0.0,0.03,0.0,0.0,-0.41,...,0.03,0.0,0.0,-0.41,0.37,0.0,-0.41,0.0,0.41,-0.18
5,0.045058,0.0,0.0,0.26,0.0,0.0,-0.03,0.0,0.0,-0.26,...,-0.03,0.0,0.0,-0.26,0.28,0.0,-0.26,0.0,0.26,0.0
6,0.072646,0.0,0.0,0.32,0.0,0.0,0.0,0.0,0.0,-0.29,...,0.0,0.0,0.0,-0.29,0.34,0.0,-0.29,0.0,0.32,0.07
7,0.031818,0.0,0.0,0.0,-0.21,0.0,0.17,0.0,-0.21,-0.04,...,0.17,0.0,-0.21,-0.04,-0.21,0.0,-0.04,0.0,0.0,-0.02
8,0.318182,0.0,0.0,-1.0,0.21,0.0,-0.14,0.0,0.21,1.0,...,0.32,-0.5,-0.29,0.5,-0.29,0.5,0.5,-0.5,-0.5,-0.24
9,0.0,0.0,0.0,0.01,0.0,0.0,-0.03,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [166]:
# plot the last 15 columns of the deltas dataframe usign plotly express
fig = px.line(df_deltas.iloc[:, -15:], title='Deltas ' + soggetto)
fig.show()

In [167]:
# multiply the last 15 columns of the deltas dataframe for the previous 15 columns
# and plot the result
df_deltas.iloc[:, -15:] = df_deltas.iloc[:, -15:].mul(df_deltas.iloc[:, -30:-15], axis=0)

fig = px.line(df_deltas.iloc[:, -15:], title='Deltas normalized ' + soggetto)
fig.show()