In [1]:
import zipfile
import pandas as pd

# Unzip the file to the current working directory
zip_path = 'BCICIV_2a_all_patients.csv.zip'
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall('.')  # Extracts to the current directory

# Load the CSV file
csv_path = './BCICIV_2a_all_patients.csv'  # Adjust path if needed
df = pd.read_csv(csv_path)

# Display the first few rows to verify
df.head()


Unnamed: 0,patient,time,label,epoch,EEG-Fz,EEG-0,EEG-1,EEG-2,EEG-3,EEG-4,...,EEG-8,EEG-9,EEG-10,EEG-11,EEG-12,EEG-13,EEG-14,EEG-Pz,EEG-15,EEG-16
0,1,-0.1,tongue,8,-1.681412,2.245496,-0.15835,1.163765,-1.523659,-0.575267,...,0.758116,3.441785,0.305517,1.137473,-1.275763,-2.898359,0.656704,-2.010063,-1.613804,-1.942455
1,1,-0.096,tongue,8,0.420417,0.587559,1.65051,0.970672,1.505904,0.891796,...,1.541586,-0.07162,0.258909,-1.448198,0.142472,-1.968405,-1.733655,-2.935578,-3.125256,-4.67461
2,1,-0.092,tongue,8,0.551365,1.499758,0.121302,2.859433,2.613414,4.636026,...,2.649097,-2.137938,-1.612096,-1.610218,-0.410173,-0.274957,-4.776535,-5.099551,-2.798995,-5.862021
3,1,-0.088,tongue,8,3.054916,-1.807238,1.843603,2.286812,5.995872,6.651295,...,6.031554,-5.249621,-2.672998,-3.45237,0.189081,1.593829,-6.081577,-5.47686,-2.932163,-6.874095
4,1,-0.084,tongue,8,2.50671,-2.453101,0.221178,0.127278,4.519931,6.249573,...,7.827097,-5.309546,-2.488783,-3.707608,1.447515,4.268278,-4.38369,-4.218426,-1.331932,-5.322692


In [37]:
# Step 1: Unzip and Load the Data (assuming df is already loaded)
import zipfile
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Encode labels to ensure they are in a valid range
le = LabelEncoder()
df['label'] = le.fit_transform(df['label'])  # Convert labels to numeric values

# Print unique labels after encoding
print(f"Unique labels after encoding: {df['label'].unique()}")

# Prepare features and labels
X = df.iloc[:, :-1].values.astype('float32')  # Convert features to float32
y = df.iloc[:, -1].values.astype('int64')     # Convert labels to int64

# Determine number of unique classes
num_classes = len(df['label'].unique())
print(f"Number of unique classes: {num_classes}")

# Convert data to torch tensors
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.long)

# Filter out invalid labels
valid_indices = (y >= 0) & (y < num_classes)
X = X[valid_indices]
y = y[valid_indices]

print(f"Filtered X shape: {X.shape}")
print(f"Filtered y shape: {y.shape}")
print(f"Unique labels after filtering: {torch.unique(y)}")

# Print data shapes
print(f"Shape of X: {X.shape}")
print(f"Shape of y: {y.shape}")

# Adjust num_channels to match the number of features
num_channels = X.size(1)
total_elements = X.numel()
sequence_length = total_elements // (X.size(0) * num_channels)

if total_elements % (X.size(0) * num_channels) != 0:
    raise ValueError("The data cannot be evenly divided into the specified number of channels. Please check your data dimensions.")

# Reshape X to have shape (batch_size, num_channels, sequence_length)
X = X.view(X.size(0), num_channels, sequence_length)
print(f"Reshaped X shape: {X.shape}")

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create DataLoader objects
train_data = TensorDataset(X_train, y_train)
test_data = TensorDataset(X_test, y_test)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

# Step 3: Define the FCRes-CNN Model
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm1d(out_channels)
        self.identity_conv = nn.Conv1d(in_channels, out_channels, kernel_size=1) if in_channels != out_channels else None

    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = nn.ReLU()(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.identity_conv is not None:
            identity = self.identity_conv(identity)
        out += identity
        return nn.ReLU()(out)

class FCResCNN(nn.Module):
    def __init__(self, num_classes):
        super(FCResCNN, self).__init__()
        self.down_sample = nn.Sequential(
            nn.Conv1d(num_channels, 32, kernel_size=1, stride=1),  # Match num_channels
            nn.BatchNorm1d(32),
            nn.Dropout(0.3)
        )
        self.res_block1 = ResidualBlock(32, 32)
        self.res_block2 = ResidualBlock(32, 64)
        self.res_block3 = ResidualBlock(64, 128)
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * sequence_length, 256),  # Dynamically adjust input size
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.down_sample(x)
        x = self.res_block1(x)
        x = self.res_block2(x)
        x = self.res_block3(x)
        x = self.classifier(x)
        return x

# Step 4: Train and Evaluate the Model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = FCResCNN(num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

def train_model(model, train_loader, criterion, optimizer, num_epochs=20):
    model.train()
    for epoch in range(num_epochs):
        for features, labels in train_loader:
            features, labels = features.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(features)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item():.4f}")

def evaluate_model(model, test_loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for features, labels in test_loader:
            features, labels = features.to(device), labels.to(device)
            outputs = model(features)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f'Accuracy on test data: {accuracy:.2f}%')

# Run training and evaluation
train_model(model, train_loader, criterion, optimizer)
evaluate_model(model, test_loader)


Unique labels after encoding: [3 0 2 1]
Number of unique classes: 4
Filtered X shape: torch.Size([141278, 25])
Filtered y shape: torch.Size([141278])
Unique labels after filtering: tensor([0, 1, 2, 3])
Shape of X: torch.Size([141278, 25])
Shape of y: torch.Size([141278])
Reshaped X shape: torch.Size([141278, 25, 1])
Epoch 1/20, Loss: 0.8848
Epoch 2/20, Loss: 0.3185
Epoch 3/20, Loss: 0.5235
Epoch 4/20, Loss: 0.3644
Epoch 5/20, Loss: 0.3057
Epoch 6/20, Loss: 0.1850
Epoch 7/20, Loss: 0.3698
Epoch 8/20, Loss: 0.3545
Epoch 9/20, Loss: 0.4239
Epoch 10/20, Loss: 0.7102
Epoch 11/20, Loss: 0.3032
Epoch 12/20, Loss: 0.4259
Epoch 13/20, Loss: 0.3375
Epoch 14/20, Loss: 0.5071
Epoch 15/20, Loss: 0.4520
Epoch 16/20, Loss: 0.3024
Epoch 17/20, Loss: 0.3730
Epoch 18/20, Loss: 0.2525
Epoch 19/20, Loss: 0.2730
Epoch 20/20, Loss: 0.6172
Accuracy on test data: 77.63%
