In [1]:
import json
import numpy as np
from scipy import stats
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
# import glmnet_python
# from glmnet_python import glmnet
# from glmnet_python import glmnetPredict
# from glmnet_python import glmnetPlot
# from glmnet_python import cvglmnet
# from glmnet_python import cvglmnetPredict
# from glmnet_python import cvglmnetPlot
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn import svm
from sklearn.neural_network import MLPClassifier
import codecs
import seaborn as sns
import matplotlib.pyplot as plt
import pingouin as pg

In [2]:
# VARIABLES TO ADJUST
# 'Mugs', 'Plates', 'Geometric', 'Cutlery', 'Ball'
family_to_select = 'Geometric'
target = 'Middle Abd'
num_bins = 5
cv = 3
# C_par [0.2, 0.5, 1, 1.25, 1.5]
C_par = 0.2
l1vsl2 = 1

In [3]:
# CONSTANTS

obj_fam = dict(
    CeramicMug = 'Mugs',
    Glass = 'Mugs',
    MetalMug = 'Mugs',
    CeramicPlate = 'Plates',
    MetalPlate = 'Plates',
    PlasticPlate = 'Plates',
    Cube = 'Geometric',
    Cylinder ='Geometric',
    Triangle ='Geometric',
    Fork = 'Cutlery',
    Knife ='Cutlery',
    Spoon ='Cutlery',
    PingPongBall = 'Ball',
    SquashBall='Ball',
    TennisBall='Ball'
)

kin_labels = ["Thumb Rotate", "Thumb MPJ", "Thumb IJ", "Index MPJ", "Index PIJ", "Middle MPJ", "Middle PIJ", "Ring MPJ", "Ring PIJ", "Pinkie MPJ", "Pinkie PIJ", "Palm Arch", "Wrist Pitch", "Wrist Yaw", "Index Abd", "Pinkie Abd", "Ring Abd", "Middle Abd", "Thumb Abd"]
emg_labels = ['emg' + str(i) for i in range(0,64)]

In [4]:
# READING FILES AND CREATING DATASET
file_kin = "./PyData/filtered_data.json"
file_eps = "./PyData/ep_labels.json"
file_task = "./PyData/task_labels.json"
file_emg = "./PyData/emg_data.json"

with open(file_kin, "r") as f:
    kin_data = json.load(f)  # data[subjects][trials][joints]

with open(file_eps, "r") as g:
    eps = json.load(g)  # eps[subjects][trials]

with open(file_task, "r") as h:
    task = json.load(h)  # task[subjects][trials]

with open(file_emg, "r") as f:
    emg_data = json.load(f)  # data[subjects][trials][sensors]
        

In [5]:
# VECTORIZING DATA

vectorized_task = [x for sublist in task for x in sublist]  # Vectorization of tasks
vectorized_eps = [x for sublist in eps for x in sublist]  # Vectorization of eps
vectorized_kin = [x for sublist in kin_data for x in sublist]  # Vectorization of trials
vectorized_kin = np.array(vectorized_kin, dtype=float) # Conversion to float to we replace 'None' with 'NaN'
vectorized_emg = [x for sublist in emg_data for x in sublist]  # Vectorization of trials
vectorized_emg = np.array(vectorized_emg, dtype=float) # Conversion to float to we replace 'None' with 'NaN'
given_object = [x.split("_")[0] for x in vectorized_task]  # Vectorized given objects
ask_object = [x.split("_")[1] for x in vectorized_task]  # Vectorized asked objects

In [6]:
# CREATE PANDAS DATAFRAME AND GENERATE TRIAL AND EP INDEX

aux = np.concatenate([vectorized_kin, vectorized_emg], axis=1)
labs = np.concatenate([kin_labels, emg_labels])

new_df = pd.DataFrame(aux, columns=labs)
new_df['EPs'] = vectorized_eps
new_df['Task'] = vectorized_task
new_df['Given'] = given_object
new_df['Asked'] = ask_object
new_df['Family'] = [obj_fam[x] for x in given_object]

tr = 0
trial = np.zeros((len(new_df['Task']),), dtype=int)
trial_list = list()
objects_list = list()
trial_list.append(tr)
objects_list.append(given_object[0])

for i in range(1,len(new_df['Task'])):
    if vectorized_task[i] != vectorized_task[i-1]:
        tr += 1
        trial_list.append(tr)
        objects_list.append(given_object[i])
    trial[i] = tr
        
new_df['Trial number'] = trial        
new_df['Trial number'] = new_df['Trial number'].astype(str)               

ep = 0
eps = np.zeros((len(new_df['EPs']),), dtype=int)

for j in range(1,len(new_df['EPs'])):
    if vectorized_task[j] != vectorized_task[j-1]: # EP to 0 if we change trial
        ep = 0
    elif vectorized_eps[j] != vectorized_eps[j-1]:
        ep += 1
    eps[j] = ep 
        
new_df['EP number'] = eps        
new_df['EP number'] = new_df['EP number'].astype(str)


In [20]:
# SELECT DATAPOINTS BY FAMILY

sel_df = new_df.loc[new_df['Family'] == family_to_select]

selected_trials = np.unique(sel_df['Trial number'])
# selected_objects = [np.unique(sel_df['Given'].loc[sel_df['Trial number'] == tri]) for tri in selected_trials]

selected = pd.DataFrame(list(zip(selected_trials,selected_objects)), columns=['Trial', 'Object'])

# print("Selected Trials: ", selected_trials)
# print("Objects for selected trials: ", [sel_df[['Given', 'Trial number']].loc[sel_df['Trial number'] == tri] for tri in selected_trials])
# print("New Selected DF: ", selected[['Trial', 'Object']])
print(selected['Trial'].astype(int))

0      10
1     100
2     101
3      11
4     132
     ... 
76      9
77     96
78     97
79     98
80     99
Name: Trial, Length: 81, dtype: int64


In [51]:
# K-FOLD, Z-SCORE, BIN CREATION, CLASSIFICATION AND STATISTICAL TEST ¿¿¿???

# STRATIFIED K-FOLD CROSS VALIDATION
skf = StratifiedKFold(n_splits=cv)

contador = 0

# NOPE: vectorize task number
for train, test in skf.split(selected['Trial'].astype(int), selected['Object'].astype(str)): 
    
    # print("\nCONTADOR: ", contador)
    # print(sel_df.iloc[train][['Task number', 'EP number']])
    # print("Train: ", train)
    # print("-------------------------------------")
    # print(test)
    # print("-------------------------------------")
    # print(np.intersect1d(np.unique(train), np.unique(selected['Trial'].astype(int))))
    # print(np.unique(train))
    # print("-------------------------------------")
    # print(np.unique(selected['Trial'].astype(int)))
    # # print(np.intersect1d(np.unique(train), np.unique(test)))
    # print("**************************************************************\n")
    
    # TRAIN/TEST SPLIT
    trials_train = selected.iloc[train]['Trial'].array
    # print("Trials to select Train: ", np.unique(trials_train))
    train_df = sel_df.loc[sel_df['Trial number'].isin(trials_train)]
    
    trials_test = selected.iloc[test]['Trial'].array
    # print("Trials to select Test: ", np.unique(trials_test))
    test_df = sel_df.loc[sel_df['Trial number'].isin(trials_test)]
    
    # print("Sel DF: ", np.unique(sel_df['Trial number']))
    # print("Train DF: ", np.unique(train_df['Trial number']))
    # print("Test DF: ", np.unique(test_df['Trial number']))
    # print("Intersection: ", np.intersect1d(np.unique(train_df['Trial number']), np.unique(test_df['Trial number'])))
    
    # Z-SCORE NORMALIZATION, ADD MISSING (no numerical) COLUMNS AND REARRANGE DATAFRAME (to fit original order)
    train_z = train_df[train_df.select_dtypes(include=[np.number]).columns].apply(stats.zscore,nan_policy='omit')
    train_z['EPs'] = train_df['EPs']
    train_z['Task'] = train_df['Task']
    train_z['Given'] = train_df['Given']
    train_z['Asked'] = train_df['Asked']
    train_z['Family'] = train_df['Family']
    train_z['Trial number'] = train_df['Trial number']
    train_z['EP number'] = train_df['EP number']
    train_z = train_z.reindex(columns=train_df.columns)
    # print(train_z.head)
    # print("Intersection: ", np.intersect1d(np.unique(train_df['Trial number']), np.unique(train_z['Trial number'])))

    
    test_z = test_df[test_df.select_dtypes(include=[np.number]).columns].apply(stats.zscore,nan_policy='omit')
    test_z['EPs'] = test_df['EPs']
    test_z['Task'] = test_df['Task']
    test_z['Given'] = test_df['Given']
    test_z['Asked'] = test_df['Asked']
    test_z['Family'] = test_df['Family']
    test_z['Trial number'] = test_df['Trial number']
    test_z['EP number'] = test_df['EP number']
    test_z = test_z.reindex(columns=test_df.columns)
    # print(test_z.head)
    # print("Intersection: ", np.intersect1d(np.unique(test_df['Trial number']), np.unique(test_z['Trial number'])))
    
#     # loop over trials
#         # loop over eps
#             # split number bins
#             # compute means
#             # append or whatever
     
#     # print(train_z['Task number'].unique())
#     train_task = train_z['Task number'].to_numpy()
#     test_task = test_z['Task number'].to_numpy()
    
    # print(np.unique(train_task))
    # print("-------------------------------------")
    # print(np.unique(test_task))
    # print("-------------------------------------")
    # print(np.intersect1d(np.unique(train_task), np.unique(test_task)))
    # print("**************************************************************\n")
    
    
    # print(np.unique(sel_df['Task number']))
    # print("-------------------------------------")
    # print(np.unique(train_z['Task number']))
    # print("-------------------------------------")
    # print(np.unique(test_z['Task number']))
    # print("**************************************************************\n")
    
    # print(train_z.loc[train_z['Trial number'] == '3'])
    # print(type(train[3].astype(str)))
    
    # print(np.unique(train_z['Trial number']))
    
#     for it1 in range(len(train)):
#         sel_tr = train_z.loc[train_z['Trial number'] == train[it1]]
#         # # print(train_z.loc[train_z['Trial number'] == it1.astype(str)])
#         # print(sel_tr.head)
#         # ep_num = sel_tr['EP number'].to_numpy()
#         print(str(train[it1])+":\n")
#         # print(type(it1))
        
#         # for it2 in np.unique(ep_num):
#         #     # a = 1
#         #     print(it2)
#         #     print("-------------------------------------")
#         print("**************************************************************\n")
        
    contador += 1
    
    print("**************************************************************")
    print("**************************************************************\n")
        
    

<bound method NDFrame.head of        Thumb Rotate  Thumb MPJ  Thumb IJ  Index MPJ  Index PIJ  Middle MPJ  \
4293      -1.665066  -0.255249 -0.752055  -0.896467  -0.751191   -0.167714   
4294      -1.665066  -0.254478 -0.752055  -0.897077  -0.751185   -0.167714   
4295      -1.665066  -0.253899 -0.752055  -0.897502  -0.751191   -0.167714   
4296      -1.665066  -0.253617 -0.752055  -0.897491  -0.751191   -0.167714   
4297      -1.665066  -0.253567 -0.752055  -0.896805  -0.751191   -0.167714   
...             ...        ...       ...        ...        ...         ...   
86911      1.871385  -0.557866  2.337077  -1.144341   0.789272   -1.171413   
86912      1.871385  -0.557866  2.337077  -1.144341   0.789272   -1.171413   
86913      1.871385  -0.557866  2.337077  -1.144341   0.789272   -1.171413   
86914      1.871385  -0.557866  2.337077  -1.144341   0.789272   -1.171413   
86915      1.871385  -0.557866  2.337077  -1.144341   0.789272   -1.171413   

       Middle PIJ  Ring MPJ  Ring