# Creación de Dataset con experimentos de fisura y perno flojo
Este notebook tiene como objetivo elaborar un conjunto de datos asociado con los experimentos que poseen el estado 3 y 4, es decir de fisura y perno flojo respectivamente, para posteriormente aplicar un preprocesamiento antes de ingresar dichos datos al modelo de las redes siamesas.

## Importaciones

In [1]:
from google.colab import drive
from os import listdir
from os.path import isfile, join
from scipy.io import loadmat

import pandas as pd

import os

## Obtención de Datos
Obtenemos los archivos de todos los experimentos y segmentamos de tal forma que nos quedamos solo con los datos relacionados a fisura y pernoflojo.

In [3]:
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
dataset_datos_dir = 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS'
dataset_images_dir = 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/IMAGES_FOLDER'

In [5]:
dataset_datos_files = [dataset_datos_dir+'/'+filename for filename in listdir(dataset_datos_dir) if isfile(join(dataset_datos_dir, filename))]

In [6]:
datos_fisura = []
datos_pernoflojo = []

for filename in listdir(dataset_datos_dir):
  if isfile(join(dataset_datos_dir, filename)):
    nombre_archivo = filename.split('.')[0]
    if (nombre_archivo.split('_')[0] == '3'):
      datos_fisura.append(dataset_datos_dir + '/' + filename)
    elif (nombre_archivo.split('_')[0] == '4'):
      datos_pernoflojo.append(dataset_datos_dir + '/' + filename)

In [7]:
datos_fisura

['drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/3_1_1A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/3_2_05A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/3_1_2A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/3_1_05A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/3_1_3A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/3_2_1A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/3_2_2A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/3_3_05A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/3_4_2A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/3_3_1A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/3_2_3A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/3_3_2A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/3_4_1A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/3_3_3A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/3_4_05A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/3

In [8]:
datos_pernoflojo

['drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/4_1_05A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/4_1_2A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/4_2_1A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/4_4_3A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/4_1_3A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/4_1_1A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/4_3_2A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/4_2_3A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/4_5_1A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/4_3_3A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/4_4_1A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/4_3_05A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/4_2_05A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/4_3_1A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/4_5_3A.mat',
 'drive/MyDrive/DATOS_EXPERIMENTALES_JACKET/DATOS/4_

## Creación de Dataset de experimentos con fisura
Para el dataset, debemos unir todos los experimentos relacionados con este estado y además agregar dos columnas indicando el número de experimento y la amplitud del experimento. 

In [20]:
numero_experimentos_fisura = []
amplitud_experimentos_fisura = []

for nombre_archivo in datos_fisura:
  nombre_experimento = nombre_archivo.split('/')[4]
  numero_experimento = nombre_experimento.split('_')[1]
  amplitud_experimento = nombre_experimento.split('.')[0].split('_')[2].replace('A', '')
  numero_experimentos_fisura.append(int(numero_experimento))
  if amplitud_experimento == '05':
    amplitud_experimento = 0.5
    amplitud_experimentos_fisura.append(amplitud_experimento)
  else:
    amplitud_experimentos_fisura.append(int(amplitud_experimento))

Verificamos que las listas hayan sido creadas correctamente para luego agregar los valores obtenidos al DataFrame general de fisura.

In [21]:
numero_experimentos_fisura

[1, 2, 1, 1, 1, 2, 2, 3, 4, 3, 2, 3, 4, 3, 4, 5, 5, 4, 5, 5]

In [22]:
amplitud_experimentos_fisura

[1, 0.5, 2, 0.5, 3, 1, 2, 0.5, 2, 1, 3, 2, 1, 3, 0.5, 1, 0.5, 3, 2, 3]

In [23]:
len(numero_experimentos_fisura) == len(amplitud_experimentos_fisura)

True

### Tenemos los siguientes datos relacionados a los experimentos con fisura

- datos_fisura (arreglo con direcciones de experimentos de fisura)
- numero_experimentos_fisura (arreglo con el numero de experimento para cada elemento en datos_fisura)
- amplitud_experimentos_fisura (arreglo con la amplitud de cada elemento en datos_fisura)

In [47]:
df_fisura = pd.DataFrame()

for indice in range(len(datos_fisura)):
  direccion = datos_fisura[indice]
  mat = loadmat(direccion)
  df = pd.DataFrame(mat['data'])
  df['#_exp'] = numero_experimentos_fisura[indice]
  df['amplitud'] = amplitud_experimentos_fisura[indice]
  df_fisura = pd.concat([df_fisura, df], axis = 0)

Verificamos que el dataset de fisura haya sido creado correctamente.

In [48]:
df_fisura.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,#_exp,amplitud
0,0.000132,0.000185,0.000122,0.000105,0.000141,7.5e-05,7e-06,0.000231,0.000299,0.000113,0.000228,0.000225,0.000168,1.8e-05,0.000344,0.000275,-1.7e-05,4.6e-05,0.0002,0.000169,9.7e-05,0.000196,-5.3e-05,1.8e-05,1,1.0
1,9.2e-05,0.000223,0.000166,0.000127,0.000191,0.000137,7.5e-05,0.000192,0.00024,9.5e-05,0.000279,0.000231,0.000145,3.7e-05,0.000339,9.2e-05,8.5e-05,7.4e-05,0.000128,0.000163,7.4e-05,8.5e-05,6.3e-05,-2.7e-05,1,1.0
2,9.1e-05,0.00028,0.000188,0.00013,0.000172,0.000102,0.000161,0.000158,0.000211,0.000123,0.000269,0.000206,0.000198,4.1e-05,0.000312,0.000179,1.1e-05,7.9e-05,0.000117,0.000146,8.5e-05,8.9e-05,-1.4e-05,2e-05,1,1.0
3,0.000179,0.000199,0.000249,0.000102,0.000268,0.00014,0.000126,0.000231,0.000239,0.000102,0.000284,0.000175,0.000164,8.2e-05,0.000311,0.000235,3.1e-05,7.9e-05,0.000257,0.00011,5e-05,0.00012,8e-05,5.7e-05,1,1.0
4,0.000282,0.000277,0.000202,0.000211,0.000251,0.000159,9.6e-05,0.000205,0.000342,0.000113,0.00024,0.000272,0.000214,5.7e-05,0.000304,0.000302,4.7e-05,0.00015,0.000271,0.000168,0.000126,0.000224,0.000105,5.8e-05,1,1.0


In [50]:
len(df_fisura)

1981940

In [51]:
len(df_fisura)/99096

20.00020182449342

In [52]:
df_fisura['#_exp'].unique()

array([1, 2, 3, 4, 5])

In [53]:
df_fisura['amplitud'].unique()

array([1. , 0.5, 2. , 3. ])

In [55]:
df_fisura['#_exp'].value_counts()/99097

5    4.0
4    4.0
3    4.0
2    4.0
1    4.0
Name: #_exp, dtype: float64

In [56]:
df_fisura['amplitud'].value_counts()/99097

1.0    5.0
0.5    5.0
3.0    5.0
2.0    5.0
Name: amplitud, dtype: float64

## Creación de Dataset de experimentos con perno flojo
Para el dataset, debemos unir todos los experimentos relacionados con este estado y además agregar dos columnas indicando el número de experimento y la amplitud del experimento. 

In [57]:
numero_experimentos_pernoflojo = []
amplitud_experimentos_pernoflojo = []

for nombre_archivo in datos_pernoflojo:
  nombre_experimento = nombre_archivo.split('/')[4]
  numero_experimento = nombre_experimento.split('_')[1]
  amplitud_experimento = nombre_experimento.split('.')[0].split('_')[2].replace('A', '')
  numero_experimentos_pernoflojo.append(int(numero_experimento))
  if amplitud_experimento == '05':
    amplitud_experimento = 0.5
    amplitud_experimentos_pernoflojo.append(amplitud_experimento)
  else:
    amplitud_experimentos_pernoflojo.append(int(amplitud_experimento))

Verificamos que las listas hayan sido creadas correctamente para luego agregar los valores obtenidos al DataFrame general de perno flojo.

In [58]:
numero_experimentos_pernoflojo

[1, 1, 2, 4, 1, 1, 3, 2, 5, 3, 4, 3, 2, 3, 5, 4, 4, 5, 5, 2]

In [59]:
amplitud_experimentos_pernoflojo

[0.5, 2, 1, 3, 3, 1, 2, 3, 1, 3, 1, 0.5, 0.5, 1, 3, 0.5, 2, 2, 0.5, 2]

In [60]:
len(numero_experimentos_fisura) == len(amplitud_experimentos_fisura)

True

### Tenemos los siguientes datos relacionados a los experimentos con pernoflojo

- datos_pernoflojo (arreglo con direcciones de experimentos de pernoflojo)
- numero_experimentos_pernoflojo (arreglo con el numero de experimento para cada elemento en datos_pernoflojo)
- amplitud_experimentos_pernoflojo (arreglo con la amplitud de cada elemento en datos_pernoflojo)

In [61]:
df_pernoflojo = pd.DataFrame()

for indice in range(len(datos_pernoflojo)):
  direccion = datos_pernoflojo[indice]
  mat = loadmat(direccion)
  df = pd.DataFrame(mat['data'])
  df['#_exp'] = numero_experimentos_pernoflojo[indice]
  df['amplitud'] = amplitud_experimentos_pernoflojo[indice]
  df_pernoflojo = pd.concat([df_pernoflojo, df], axis = 0)

Verificamos que el dataset de perno flojo haya sido creado correctamente.

In [69]:
df_pernoflojo.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,#_exp,amplitud
0,0.000168,0.000204,0.000213,0.000135,0.000201,9.9e-05,0.000108,0.000189,0.000212,9.3e-05,0.000257,0.000196,0.000173,2.9e-05,0.000316,0.000202,1e-05,8.5e-05,0.000151,0.000136,0.000116,9.9e-05,5.4e-05,0.000111,1,0.5
1,0.000122,0.000277,0.000213,0.00018,0.00023,0.000172,6.8e-05,0.000213,0.000214,9.5e-05,0.000272,0.000172,0.000173,5e-05,0.000292,0.000178,2.1e-05,2.2e-05,0.000203,0.000139,7.6e-05,0.000164,8.9e-05,7.6e-05,1,0.5
2,9.9e-05,0.000214,0.000128,0.000153,0.000217,0.0001,6.4e-05,0.000174,0.000225,0.000116,0.000245,0.000202,0.00014,1.2e-05,0.000309,0.000256,2.3e-05,8.9e-05,0.000225,0.000162,9.8e-05,0.000154,9.1e-05,9.2e-05,1,0.5
3,0.00015,0.000258,0.000209,0.000163,0.000213,0.00012,6.6e-05,0.000157,0.00022,9.6e-05,0.000229,0.000203,0.000157,5.7e-05,0.000292,0.00026,6.2e-05,7.7e-05,0.000212,0.000178,0.000123,0.00015,5.6e-05,0.00013,1,0.5
4,0.000194,0.000244,0.000194,0.000201,0.000202,0.000127,0.000133,0.000197,0.000212,0.00011,0.000266,0.000199,0.000181,4e-05,0.000336,0.000209,9.2e-05,9.7e-05,0.000183,0.00017,0.000114,0.000127,6.4e-05,8.7e-05,1,0.5


In [62]:
len(df_pernoflojo)

1981940

In [63]:
len(df_pernoflojo)/99097

20.0

In [64]:
df_pernoflojo['#_exp'].unique()

array([1, 2, 4, 3, 5])

In [65]:
df_pernoflojo['amplitud'].unique()

array([0.5, 2. , 1. , 3. ])

In [66]:
df_fisura['#_exp'].value_counts()/99097

5    4.0
4    4.0
3    4.0
2    4.0
1    4.0
Name: #_exp, dtype: float64

In [67]:
df_fisura['amplitud'].value_counts()/99097

1.0    5.0
0.5    5.0
3.0    5.0
2.0    5.0
Name: amplitud, dtype: float64