In [16]:
import tensorflow as tf
import keras
from keras import layers, models
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import os
from sklearn.decomposition import PCA

In [17]:
X = np.load('C:\\Users\\47575909\\Desktop\\X_PCA.npy')
y = np.load('C:\\Users\\47575909\\Desktop\\y_PCA.npy')

In [18]:
X_train, X_testVal, y_train, y_testVal = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_testVal, y_testVal, test_size=0.5, random_state=42)

In [19]:
X_train.shape

(20711, 1000)

In [20]:
model = tf.keras.models.Sequential([
    tf.keras.Input(shape = (1000, )),
    tf.keras.layers.Dense(8, activation=tf.nn.relu),
    tf.keras.layers.Dense(16, activation=tf.nn.relu),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(8, activation=tf.nn.relu),
    tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)
])

In [21]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [22]:
model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))

test_loss, test_acc = model.evaluate(X_val, y_val)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [23]:
print("Accuracy : ", test_acc)
print("Loss : ", test_loss)

Accuracy :  0.5211806893348694
Loss :  0.6922646760940552


In [26]:
predictions = model.predict(X_test)
probability_of_epilepsy = predictions[0]
confidence_percentage = probability_of_epilepsy * 100
print("Porcentaje de confianza:", confidence_percentage, "%")

Porcentaje de confianza: [51.84041] %


In [4]:
TFrecord_directory = 'C:\\Users\\47575909\\Desktop\\tfrecord_data_ejemplos' 

#lista de archivos TFRecord en la carpeta
filenames = [os.path.join(TFrecord_directory, f) for f in os.listdir(TFrecord_directory) if f.endswith('.tfrecord')]

#Hago un dataset de TensorFlow a partir de los archivos TFRecord
dataset = tf.data.TFRecordDataset(filenames)

#función para parsear los datos de TFRecord
def parse_tfrecord_fn(example):
    feature_description = {
        'grpno': tf.io.FixedLenFeature([], tf.int64),
        'label': tf.io.FixedLenFeature([], tf.int64),
        'path': tf.io.FixedLenFeature([], tf.string),
    }
    example = tf.io.parse_single_example(example, feature_description)
    return example

parsed_dataset = dataset.map(parse_tfrecord_fn)

#batching para argar los datos en lotes
batch_size = 5  #puedo cambiar el tamaño (en la PC del colegio el máximo es 16, pero capáz que le tengo que poner menos)
batched_dataset = parsed_dataset.batch(batch_size)

In [7]:
from tqdm import tqdm

# Define el número máximo de componentes principales que deseas mantener para todos los lotes
max_components = 1000  # Puedes ajustar este valor según tus necesidades

X_list = []  # Lista temporal para X
y_list = []  # Lista temporal para y

# Crea un objeto tqdm para rastrear el progreso
pbar = tqdm(total=len(filenames))

# Itera los lotes
for batch_data in batched_dataset:
    paths = batch_data['path'].numpy()
    labels = batch_data['label'].numpy()
    
    # Cargar los scaleograms desde los archivos .npy
    batch_X = []
    for path in paths:
        spectrogram = np.load(path.decode('utf-8'))
        batch_X.append(spectrogram)
    
    # Convierte batch_X en un arreglo NumPy
    batch_X = np.array(batch_X)
    
    batch_X = batch_X.reshape((batch_X.shape[0], -1))  # Aplanar los datos
    
    # NORMALIZA X en rango [-1, 1] para que el cero quede en el centro
    min_val = np.min(batch_X)
    max_val = np.max(batch_X)

    batch_X = -1 + 2 * (batch_X - min_val) / (max_val - min_val)
    
    variance_to_keep = 0.95  # Me quedo con el 95% de la varianza

    pca = PCA()
    batch_X = pca.fit_transform(batch_X)  # Aplica PCA para los datos escalados

    # Asegúrate de que todos los lotes tengan la misma cantidad de componentes principales
    n_components_to_keep = min(max_components, batch_X.shape[1])
    if batch_X.shape[1] < max_components:
        # Rellena con ceros las columnas faltantes si es necesario
        zeros_to_add = max_components - batch_X.shape[1]
        zeros = np.zeros((batch_X.shape[0], zeros_to_add))
        batch_X = np.hstack((batch_X, zeros))
    else:
        batch_X = batch_X[:, :n_components_to_keep]

    # Agrega los scaleograms y labels a X_list e y_list
    X_list.append(batch_X)
    y_list.extend(labels)

    # Actualiza la barra de progreso
    pbar.update(len(paths))  # Actualiza la barra de progreso según la cantidad de archivos en el lote

# Convierte X_list e y_list en arreglos NumPy
X = np.concatenate(X_list, axis=0)
y = np.array(y_list)

# Cierra la barra de progreso
pbar.close()


  0%|                                                                                            | 0/1 [00:22<?, ?it/s][A

5it [00:00, 34.33it/s]                                                                                                 [A
10it [00:00, 28.19it/s][A
15it [00:00, 27.85it/s][A
20it [00:00, 26.46it/s][A
25it [00:00, 26.55it/s][A
30it [00:01, 25.98it/s][A
35it [00:01, 25.04it/s][A
40it [00:01, 25.75it/s][A
45it [00:01, 26.89it/s][A
50it [00:01, 26.41it/s][A
55it [00:02, 26.64it/s][A
60it [00:02, 27.06it/s][A
65it [00:02, 27.77it/s][A
70it [00:02, 27.30it/s][A
75it [00:02, 27.34it/s][A
80it [00:02, 28.31it/s][A
85it [00:03, 26.81it/s][A
90it [00:03, 26.73it/s][A
95it [00:03, 26.14it/s][A
100it [00:03, 26.85it/s][A
105it [00:03, 27.21it/s][A
110it [00:04, 27.19it/s][A
115it [00:04, 26.57it/s][A
120it [00:04, 26.76it/s][A
125it [00:04, 27.82it/s][A
130it [00:04, 27.24it/s][A
135it [00:04, 27.43it/s][A
140it [00:05, 27.33it/s][A
145it [00:05, 

In [8]:
X_shape = X.shape
shape_0 = X_shape[0]
X_shape

(309, 1000)

In [9]:
X = X.reshape(shape_0, -1)

In [10]:
min_val = np.min(X)
max_val = np.max(X)

X = -1 + 2 * (X - min_val) / (max_val - min_val)

#X = (X - X.min()) / (X.max() - X.min()) [0, 1]

In [11]:
np.save('ejemplo_epilepsia.npy', X)