In [2]:
import numpy as np
import os

file_list = os.listdir('/content/SR250Breath')
windows = []
labels = []

for file_name in file_list:
    file_path = os.path.join('/content/SR250Breath', file_name)
    try:
        data = np.load(file_path)

        if file_path.endswith('.window.npy'):
            windows.append(data)
        elif file_path.endswith('.label.npy'):
            labels.append(data)
        else:
            print(f"Unknown file type: {file_path}")
    except Exception as e:
        print(f"Could not load {file_name}: {e}")

# You can access the arrays using the file names as keys, for example:
# print(data_arrays['Millenials_E_breath_sitting_desk_20250909-115853.window.npy'])

In [3]:
windows = np.concat(windows)
labels = np.concat(labels)

print(windows.shape)
print(labels.shape)

(36833, 11, 3, 120)
(36833,)


In [4]:
# in maniera tale che l'ultimo asse corrisponde alle antenne
windows = windows.transpose(0, 1, 3, 2)
print(windows.shape)

(36833, 11, 120, 3)


In [5]:
abs_windows = np.abs(windows)
print(abs_windows.shape)

(36833, 11, 120, 3)


In [6]:
phase_windows = np.angle(windows)

In [18]:
all = np.concatenate((abs_windows, phase_windows), axis=-1)
print(all.shape)

(36833, 11, 120, 6)


In [19]:
import tensorflow as tf

# Assuming 'labels' is your numpy array of labels
# You might need to adjust the 'depth' parameter based on the number of unique classes in your labels
one_hot_labels = tf.one_hot(labels, depth=2) # Assuming 2 classes for demonstration

print(one_hot_labels.shape)

(36833, 2)


In [35]:
from sklearn.model_selection import train_test_split
import numpy as np

# Convert TensorFlow tensor to NumPy array
one_hot_labels_np = one_hot_labels.numpy()

# Split into training and temporary sets (for test and validation)
X_train, X_temp, y_train, y_temp = train_test_split(phase_windows, one_hot_labels_np, test_size=0.3, random_state=42)

# Split the temporary set into test and validation sets
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print("Training set shapes:")
print("X_train:", X_train.shape)
print("y_train:", y_train.shape)
print("\nValidation set shapes:")
print("X_val:", X_val.shape)
print("y_val:", y_val.shape)
print("\nTest set shapes:")
print("X_test:", X_test.shape)
print("y_test:", y_test.shape)

Training set shapes:
X_train: (25783, 11, 120, 3)
y_train: (25783, 2)

Validation set shapes:
X_val: (5525, 11, 120, 3)
y_val: (5525, 2)

Test set shapes:
X_test: (5525, 11, 120, 3)
y_test: (5525, 2)


# Task
Produce a CNN model for the prediction of the label and train it using the training, test, and validation data split from the `abs_windows` and `labels` variables.

## Define the cnn model architecture

### Subtask:
Define the layers of the CNN model, including convolutional layers, pooling layers, and dense layers, suitable for your data shape.


**Reasoning**:
Define the CNN model architecture using convolutional, pooling, and dense layers.



**Reasoning**:
The error indicates that the input shape is too small for the convolutional and pooling layers. Adjust the kernel size and pooling size to be smaller to avoid negative dimensions.



In [48]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

model = Sequential()

# Add convolutional layers with adjusted kernel and pooling sizes
model.add(Conv2D(32, (1, 1), activation='relu', input_shape=X_train.shape[1:]))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (2, 2), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (1, 1), activation='relu'))
model.add(MaxPooling2D((2, 4)))

# Flatten the output
model.add(Flatten())

# Add dense layers
# model.add(Dense(128, activation='relu'))
model.add(Dense(y_train.shape[1], activation='sigmoid'))

model.summary()

**Reasoning**:
The model summary shows that the output shape after the last MaxPooling2D layer has a dimension of 0, which is incorrect. The input shape is likely still too large for the kernel and pooling sizes. I need to further adjust the kernel and pooling sizes to ensure the output dimensions remain positive.



In [31]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

model = Sequential()

# Add convolutional layers with further adjusted kernel and pooling sizes
model.add(Conv2D(32, (1, 1), activation='relu', input_shape=X_train.shape[1:]))
model.add(MaxPooling2D((1, 1))) # Use (1,1) pooling to avoid reducing dimension too quickly
model.add(Conv2D(64, (1, 1), activation='relu'))
model.add(MaxPooling2D((1, 1)))
model.add(Conv2D(128, (1, 1), activation='relu'))
model.add(MaxPooling2D((1, 1)))


# Flatten the output
model.add(Flatten())

# Add dense layers
model.add(Dense(128, activation='relu'))
model.add(Dense(y_train.shape[1], activation='sigmoid'))

model.summary()

**Reasoning**:
The model summary now shows positive dimensions after the pooling layers, but using (1,1) kernel and pooling sizes essentially bypasses the spatial feature extraction capabilities of CNNs. I need to find a balance between kernel/pooling sizes and the input shape to effectively use convolutional layers without causing negative dimensions. I will try increasing the kernel size slightly while keeping the pooling size small.



In [24]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

model = Sequential()

# Add convolutional layers with adjusted kernel and pooling sizes
model.add(Conv2D(32, (2, 2), activation='relu', input_shape=X_train.shape[1:]))
model.add(MaxPooling2D((1, 2))) # Pool only along the second dimension
model.add(Conv2D(64, (2, 2), activation='relu'))
model.add(MaxPooling2D((1, 2)))
model.add(Conv2D(128, (2, 2), activation='relu'))
model.add(MaxPooling2D((1, 2)))


# Flatten the output
model.add(Flatten())

# Add dense layers
model.add(Dense(128, activation='relu'))
model.add(Dense(y_train.shape[1], activation='sigmoid'))

model.summary()

## Compile the model

### Subtask:
Compile the CNN model by specifying the optimizer, loss function, and metrics.


**Reasoning**:
Compile the CNN model with the Adam optimizer, binary crossentropy loss, and accuracy metric.



In [49]:
from tensorflow.keras.optimizers import Adam

model.compile(optimizer=Adam(),
              loss='binary_crossentropy',
              metrics=['accuracy'])

## Train the model

### Subtask:
Train the compiled model using the training data (`X_train`, `y_train`) and validate it using the validation data (`X_val`, `y_val`).


**Reasoning**:
Train the compiled model using the training and validation data.



In [50]:
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))

Epoch 1/10
[1m806/806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 7ms/step - accuracy: 0.8276 - loss: 0.4601 - val_accuracy: 0.8366 - val_loss: 0.4406
Epoch 2/10
[1m806/806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.8360 - loss: 0.4397 - val_accuracy: 0.8366 - val_loss: 0.4368
Epoch 3/10
[1m806/806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.8356 - loss: 0.4277 - val_accuracy: 0.8364 - val_loss: 0.4451
Epoch 4/10
[1m806/806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.8360 - loss: 0.4109 - val_accuracy: 0.8386 - val_loss: 0.4081
Epoch 5/10
[1m806/806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.8482 - loss: 0.3754 - val_accuracy: 0.8386 - val_loss: 0.3995
Epoch 6/10
[1m806/806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.8586 - loss: 0.3391 - val_accuracy: 0.8400 - val_loss: 0.3960
Epoch 7/10
[1m806/806[0m 

**Reasoning**:
The first step is to load the data from the CSV file into a pandas DataFrame and display the first few rows to understand its structure.



## Evaluate the model

### Subtask:
Evaluate the trained model on the test data (`X_test`, `y_test`) to assess its performance.

**Reasoning**:
Evaluate the trained model on the test data to assess its performance.

In [51]:
loss, accuracy = model.evaluate(X_test, y_test)

print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8527 - loss: 0.3866
Test Loss: 0.38789311051368713
Test Accuracy: 0.8544796109199524
