In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('test_files/fcsv_files/F.fcsv', skiprows=2) #Nos salteamos los primeros 2 rows para evitar fallos de lectura.

### Leemos el .fcsv

In [3]:
df

Unnamed: 0,# columns = id,x,y,z,ow,ox,oy,oz,vis,sel,lock,label,desc,associatedNodeID
0,vtkMRMLMarkupsFiducialNode_0,4.06744,124.969,-476.645,0,0,0,1,1,1,0,F-1,,vtkMRMLScalarVolumeNode1
1,vtkMRMLMarkupsFiducialNode_1,-40.4434,85.2121,-476.645,0,0,0,1,1,1,0,F-2,,vtkMRMLScalarVolumeNode1
2,vtkMRMLMarkupsFiducialNode_2,48.5783,90.3978,-476.645,0,0,0,1,1,1,0,F-3,,vtkMRMLScalarVolumeNode1
3,vtkMRMLMarkupsFiducialNode_3,-34.8086,137.524,-386.207,0,0,0,1,1,1,0,F-4,,vtkMRMLScalarVolumeNode2
4,vtkMRMLMarkupsFiducialNode_4,59.3958,114.521,-386.207,0,0,0,1,1,1,0,F-5,,vtkMRMLScalarVolumeNode2
5,vtkMRMLMarkupsFiducialNode_5,-35.904,46.6061,-386.207,0,0,0,1,1,1,0,F-6,,vtkMRMLScalarVolumeNode2
6,vtkMRMLMarkupsFiducialNode_6,38.0355,37.8429,-386.207,0,0,0,1,1,1,0,F-7,,vtkMRMLScalarVolumeNode2


### Le damos otro formato (tps ish)

In [4]:
tps = df[df.columns[[1, 2, 3, -3, -1]]]

In [5]:
tps.columns = ["x", "y", "z", "label", "individuo"]

In [6]:
individuos = [i[-1] for i in tps['individuo']]
tps['individuo'] = individuos

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [7]:
tps

Unnamed: 0,x,y,z,label,individuo
0,4.06744,124.969,-476.645,F-1,1
1,-40.4434,85.2121,-476.645,F-2,1
2,48.5783,90.3978,-476.645,F-3,1
3,-34.8086,137.524,-386.207,F-4,2
4,59.3958,114.521,-386.207,F-5,2
5,-35.904,46.6061,-386.207,F-6,2
6,38.0355,37.8429,-386.207,F-7,2


In [8]:
tps = tps[tps.columns[[-1, 0, 1, 2, 3]]]

In [9]:
tps

Unnamed: 0,individuo,x,y,z,label
0,1,4.06744,124.969,-476.645,F-1
1,1,-40.4434,85.2121,-476.645,F-2
2,1,48.5783,90.3978,-476.645,F-3
3,2,-34.8086,137.524,-386.207,F-4
4,2,59.3958,114.521,-386.207,F-5
5,2,-35.904,46.6061,-386.207,F-6
6,2,38.0355,37.8429,-386.207,F-7


### Formato MorphoJ (ish)

In [10]:
individuos = tps['individuo'].unique()
landmarks_x_individuo = [0 for _ in range(len(individuos))]

In [11]:
for i in tps['individuo']:
    for u in individuos:
        if int(i) == int(u):
            landmarks_x_individuo[int(u)-1] += 1

In [12]:
landmarks_x_individuo

[3, 4]

### Individuo 1 -> 3 landmarks, individuo 2 -> 4

In [13]:
def crear_columnas(hm=1):
    template = ['x', 'y', 'z']
    columnas = []
    for i in range(hm):
        columnas.append(template[0]+str(i+1)) #new x
        columnas.append(template[1]+str(i+1)) #new y
        columnas.append(template[2]+str(i+1)) #new z
    return columnas

In [14]:
nuevas_columnas = crear_columnas(max(landmarks_x_individuo))

In [15]:
nuevas_columnas

['x1', 'y1', 'z1', 'x2', 'y2', 'z2', 'x3', 'y3', 'z3', 'x4', 'y4', 'z4']

In [16]:
def concatenar_landmarks(landmarks):
    data = {}
    for r in landmarks.values: #r = row
        ind = r[0]
        new_coords = r[1:-1].tolist()
        if ind in data.keys():
            coords = np.array(data[ind]).tolist()
            coords.extend(new_coords)
            new_coords = coords
        data.update({ind: np.array(new_coords)})
    return [v.tolist() for v in data.values()]

In [17]:
morphoj = pd.DataFrame(data=concatenar_landmarks(tps), columns=nuevas_columnas)

In [18]:
morphoj

Unnamed: 0,x1,y1,z1,x2,y2,z2,x3,y3,z3,x4,y4,z4
0,4.06744,124.969,-476.645,-40.4434,85.2121,-476.645,48.5783,90.3978,-476.645,,,
1,-34.8086,137.524,-386.207,59.3958,114.521,-386.207,-35.904,46.6061,-386.207,38.0355,37.8429,-386.207


### Refactorizamos la concatenación

In [19]:
def concatenar_landmarks2(landmarks, individuos_ids):
    coords = []
    for i in individuos_ids:
        coords.append(landmarks[['x','y','z']][landmarks['individuo']==str(i)].values.flatten().tolist())
    return coords

In [20]:
morphoj = pd.DataFrame(data=concatenar_landmarks2(tps, individuos), columns=nuevas_columnas)

In [21]:
morphoj

Unnamed: 0,x1,y1,z1,x2,y2,z2,x3,y3,z3,x4,y4,z4
0,4.06744,124.969,-476.645,-40.4434,85.2121,-476.645,48.5783,90.3978,-476.645,,,
1,-34.8086,137.524,-386.207,59.3958,114.521,-386.207,-35.904,46.6061,-386.207,38.0355,37.8429,-386.207


### Repitiendo el proceso para archivos .pts (raw from Landmark)

##### Con un solo archivo

In [22]:
pts325 = pd.read_csv('test_files/pts_files/325.pts', skiprows=1, delim_whitespace=True).reset_index()

In [23]:
pts325.head()

Unnamed: 0,level_0,level_1,level_2,41
0,S000,5.324964,4.245402,35.801582
1,S001,5.151456,-29.399679,52.296288
2,S002,5.359242,-35.592625,48.947098
3,S003,6.382361,-40.913445,37.293526
4,S004,23.53334,-32.111774,34.557816


In [24]:
colnames = pts325.columns
pts325 = pts325.rename(index=str, columns={colnames[0]:"landmark_id", colnames[1]: "x", colnames[2]: "y", colnames[3]: "z"})

In [25]:
col_ind = ["325" for _ in range(pts325.shape[0])] # llena una columna con el id del individuo

In [26]:
pts325 = pts325.assign(individuo=pd.Series(col_ind).values)

In [27]:
new_order = [-1, 0, 1, 2, 3]
pts325 = pts325[pts325.columns[new_order]]

In [28]:
pts325.head()

Unnamed: 0,individuo,landmark_id,x,y,z
0,325,S000,5.324964,4.245402,35.801582
1,325,S001,5.151456,-29.399679,52.296288
2,325,S002,5.359242,-35.592625,48.947098
3,325,S003,6.382361,-40.913445,37.293526
4,325,S004,23.53334,-32.111774,34.557816


In [29]:
nuevas_columnas = crear_columnas(pts325.shape[0])

In [30]:
morphoj2 = pd.DataFrame(data=concatenar_landmarks2(pts325, ['325']), columns=nuevas_columnas)

In [31]:
morphoj2

Unnamed: 0,x1,y1,z1,x2,y2,z2,x3,y3,z3,x4,...,z38,x39,y39,z39,x40,y40,z40,x41,y41,z41
0,5.324964,4.245402,35.801582,5.151456,-29.399679,52.296288,5.359242,-35.592625,48.947098,6.382361,...,37.209084,-18.262831,-63.694931,21.245277,6.577489,-62.049553,33.207909,6.926522,-63.667809,34.941536


In [32]:
def concatenar_landmarks3(landmarks, individuo):
    coords = [individuo]
    coords.extend(landmarks[['x','y','z']][landmarks['individual']==str(individuo)].values.flatten().tolist())
    return [coords]

In [33]:
def process_pts_files(base_path, fnames, out_name):
    data = None
    for fname in fnames:
        if '.pts' in fname:
            #preprocesamiento
            individual = fname.split('.pts')[0].split('/')[-1] #obtenemos el nombre del individuo en base al nombre
            f = pd.read_csv(base_path+fname, skiprows=1, delim_whitespace=True).reset_index() #lee el archivo pts -> es necesario renombrar las columnas de datos
            col_names = f.columns
            f = f.rename(index=str, columns={col_names[0]:"landmark_id", col_names[1]: "x", col_names[2]: "y", col_names[3]: "z"}) #renombramos las columnas
            col_ind = [individual for _ in range(f.shape[0])] # llena una columna con el id del individuo
            f = f.assign(individual=pd.Series(col_ind).values) # agrega la columna con el nombre (al final)
            new_order = [-1, 0, 1, 2, 3]
            f = f[f.columns[new_order]] # reordenamos las columnas para que el nombre al principio

            #preparamos los datos con el formato final
            new_columns = ['individual'] # el nombre del individuo al principio
            new_columns.extend(crear_columnas(f.shape[0])) #creamos tantas columnas como landmarks tenga el individuo (x Cantidad de dimensiones)
            landmarks = concatenar_landmarks3(f, individual) #obtenemos los valores de los landmarks (x,y,z)
            if data is None:
                data = pd.DataFrame(data=landmarks, columns=new_columns) #La primera vez partimos de un DataFrame final inexistente. Lo creamos y cargamos los primeros datos
            else:
                data = data.append(pd.DataFrame(data=landmarks, columns=new_columns)) #si el DataFrame final ya existía, agregamos los datos nuevos.
    
    if data is None:
        print('No hay archivos .pts en {}. Por favor, utiliza un directorio válido.'.format(base_path))
    else:        
        data.to_csv(base_path+out_name, index=None)
        print('Datos guardados en {}'.format(base_path+out_name))
        return data

In [34]:
base_path = 'test_files/pts_files/'
out_name = 'output.csv'
fnames = os.listdir(base_path)
fnames.sort()
process_pts_files(base_path, fnames, out_name)

Datos guardados en test_files/pts_files/output.csv


Unnamed: 0,individual,x1,y1,z1,x2,y2,z2,x3,y3,z3,...,z38,x39,y39,z39,x40,y40,z40,x41,y41,z41
0,325,5.324964,4.245402,35.801582,5.151456,-29.399679,52.296288,5.359242,-35.592625,48.947098,...,37.209084,-18.262831,-63.694931,21.245277,6.577489,-62.049553,33.207909,6.926522,-63.667809,34.941536
0,326,-20.30303,21.24157,60.173042,-17.922991,-19.947447,83.200829,-17.992428,-25.709305,78.914078,...,64.104263,-43.91227,-50.52401,46.190948,-18.300264,-48.979,61.418076,-18.064859,-50.810741,60.635811
0,327,5.011319,51.302269,40.239456,8.393121,3.565066,56.927658,7.979037,-1.642891,51.261314,...,37.44104,-19.627626,-25.597197,20.893188,4.677782,-24.450739,33.27985,4.459326,-26.234285,32.08783
