In [1]:
import os
from PIL import Image

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from skimage.transform import resize
from torch.utils.data import DataLoader, Dataset

In [3]:
torch.cuda.is_available()

True

In [4]:
NUM_CLASSES = 6
NUM_FILES = 100
BATCH_SIZE = 8
IMG_SIZE = (300,400)  # len, width
EPOCHS = 100

In [5]:
def read_data(data_folder, num_files=None):
    """
    Read Spectrograms data from .npy files in the specified data folder.

    Parameters:
    - data_folder (str): Path to the main data folder containing 'train' and 'test' subfolders.
    - num_files (int or None): Number of files to read from each subfolder. If None, all files will be read.

    Returns:
    - train (array[Tuple[np.ndarray, np.ndarray]]): List of tuples containing train EEG data.
    - test (array[Tuple[np.ndarray, np.ndarray]]): List of tuples containing test EEG data.
    - train_labels (pd.DataFrame): DataFrame containing train labels.
    - test_labels (pd.DataFrame): DataFrame containing test labels.
    """
    train_spec_folder = os.path.join(data_folder, 'train_spectrograms')
    test_spec_folder = os.path.join(data_folder, 'test_spectrograms')

    def read_npy_folder(folder_path, n_files=None):
        arrays = []
        files_to_read = os.listdir(folder_path)[:n_files] if n_files else os.listdir(folder_path)
        for file in files_to_read:
            if file.endswith('.npy'):
                file_path = os.path.join(folder_path, file)
                array = np.load(file_path)
                arrays.append(array)
        print(f"Read {len(arrays)} files from {folder_path}.")
        return arrays

    # Read EEG data
    train_spec = read_npy_folder(train_spec_folder, num_files)
    test_spec = read_npy_folder(test_spec_folder)

    train_labels = pd.read_csv(os.path.join(data_folder, 'train.csv'), nrows=num_files)
    test_labels = pd.read_csv(os.path.join(data_folder, 'test.csv'))

    return train_spec, test_spec, train_labels, test_labels

In [6]:
train,_test,train_labels,_test_labels = read_data('data/npy_data/npy_data',num_files=NUM_FILES)
labels = pd.read_csv('train.csv', nrows=NUM_FILES)
X_train, X_val, y_train, y_val = train_test_split(
    train, train_labels,
    test_size=0.2,
    random_state=42,
    shuffle=True
)

Read 100 files from data/npy_data/npy_data/train_spectrograms.
Read 1 files from data/npy_data/npy_data/test_spectrograms.


In [7]:
y_train = np.array(y_train)
y_val = np.array(y_val)

label_encoder = LabelEncoder()
encoded_labels_train = label_encoder.fit_transform(y_train[:, 8])
encoded_labels_val = label_encoder.fit_transform(y_val[:, 8])

y_train = torch.nn.functional.one_hot(torch.tensor(encoded_labels_train), num_classes=NUM_CLASSES).float()
y_val = torch.nn.functional.one_hot(torch.tensor(encoded_labels_val), num_classes=NUM_CLASSES).float()

In [8]:
y_train.shape
X_train[0].shape

(304, 401)

In [9]:
class CNNModel(nn.Module):
    def __init__(self, num_classes):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(32 * 75 * 100, 128)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu3(x)
        x = self.fc2(x)
        return x

class ImageDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        if self.transform:
            image = Image.fromarray(image)
            if image.mode != 'RGB':
                image = image.convert('RGB')

            image = self.transform(image)
        return image, label

In [10]:
model = CNNModel(num_classes=NUM_CLASSES)

In [11]:
transform = transforms.Compose([
    transforms.Resize(IMG_SIZE),  
    transforms.ToTensor(),       
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) 
])

In [12]:
train_dataset = ImageDataset(X_train, y_train, transform=transform)
val_dataset = ImageDataset(X_val, y_val, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)


In [13]:
model = CNNModel(num_classes=NUM_CLASSES)
criterion = nn.KLDivLoss(reduction='batchmean')
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [14]:
for epoch in range(EPOCHS):
    model.train() 
    running_loss = 0.0
    for i, (images, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(images)
        labels_one_hot = torch.nn.functional.one_hot(torch.argmax(labels, dim=1), num_classes=NUM_CLASSES).float()
        loss = criterion(outputs, labels_one_hot)
        loss.backward()
        optimizer.step()  
        running_loss += loss.item()
        if i % 10 == 9:
            print(f'[Epoch {epoch + 1}, Mini-batch {i + 1}] Loss: {running_loss / 10:.3f}')
            running_loss = 0.0
    
    # Validation loop
    model.eval() 
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            outputs = model(images)
            labels_one_hot = torch.nn.functional.one_hot(torch.argmax(labels, dim=1), num_classes=NUM_CLASSES).float()
            loss = criterion(outputs, labels_one_hot)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == torch.argmax(labels_one_hot, dim=1)).sum().item()


print(f'Epoch {epoch + 1}/{EPOCHS}, Validation Loss: {val_loss / len(val_loader):.4f}, Accuracy: {(correct / total) * 100:.2f}%')

[Epoch 1, Mini-batch 10] Loss: -126.189
[Epoch 2, Mini-batch 10] Loss: -2007.984
[Epoch 3, Mini-batch 10] Loss: -13623.144
[Epoch 4, Mini-batch 10] Loss: -57942.819
[Epoch 5, Mini-batch 10] Loss: -190910.945
[Epoch 6, Mini-batch 10] Loss: -514265.206
[Epoch 7, Mini-batch 10] Loss: -1222846.156
[Epoch 8, Mini-batch 10] Loss: -2543451.475
[Epoch 9, Mini-batch 10] Loss: -4870851.000
[Epoch 10, Mini-batch 10] Loss: -8693399.000


KeyboardInterrupt: 

In [None]:
# def convert_parquet_to_npy(input_folder, output_folder):
#     npy_output_folder = os.path.join(output_folder, 'npy_data')
    
#     # Ensure the output directory exists
#     os.makedirs(npy_output_folder, exist_ok=True)
    
#     for root, dirs, files in os.walk(input_folder):
#         for file in files:
#             if file.endswith('.parquet'):
#                 parquet_path = os.path.join(root, file)
#                 df = pd.read_parquet(parquet_path)
#                 eeg_data = df.to_numpy()
#                 relative_path = os.path.relpath(parquet_path, input_folder)
                
#                 # Create the corresponding directory structure in the npy_data folder
#                 output_subfolder = os.path.join(npy_output_folder, os.path.dirname(relative_path))
#                 os.makedirs(output_subfolder, exist_ok=True)
#                 np.save(os.path.join(output_subfolder, file.replace('.parquet', '.npy')), eeg_data)