In [1]:
!pip install torch optuna scikit-learn pandas -q

import os
import shutil
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.preprocessing import StandardScaler
import optuna
from google.colab import files

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.1/380.1 kB[0m [31m654.4 kB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.0/233.0 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.3/21.3 MB[0m [31m41.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [4]:

# Function to preprocess data
def preprocess_data(file_path):
    raw_data = pd.read_csv(file_path)
    raw_data.drop(index=0, inplace=True)  # Remove the first row which is the indexes
    X = raw_data.iloc[:, :-1].values  # First 16 columns as features
    y = raw_data.iloc[:, -1].values   # 17th column as labels
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    X = X[:, np.newaxis, :]  # Add channel dimension for U-Net
    return torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.long)

# Define the U-Net architecture
class UNet1D(nn.Module):
    def __init__(self, n_channels, n_classes, base_filters):
        super(UNet1D, self).__init__()
        self.inc = self.double_conv(n_channels, base_filters)
        self.down1 = self.down(base_filters, base_filters*2)
        self.down2 = self.down(base_filters*2, base_filters*4)
        self.down3 = self.down(base_filters*4, base_filters*8)
        self.up1 = self.up(base_filters*8, base_filters*4)
        self.up2 = self.up(base_filters*4, base_filters*2)
        self.up3 = self.up(base_filters*2, base_filters)
        self.outc = nn.Conv1d(base_filters, n_classes, kernel_size=1)

    def double_conv(self, in_channels, out_channels):
        return nn.Sequential(
            nn.Conv1d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm1d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv1d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm1d(out_channels),
            nn.ReLU(inplace=True)
        )

    def down(self, in_channels, out_channels):
        return nn.Sequential(
            nn.MaxPool1d(2),
            self.double_conv(in_channels, out_channels)
        )

    def up(self, in_channels, out_channels):
        return nn.Sequential(
            nn.ConvTranspose1d(in_channels, out_channels, kernel_size=2, stride=2),
            self.double_conv(out_channels, out_channels)
        )

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x = self.up1(x4)
        x = self.up2(x)
        x = self.up3(x)
        x = self.outc(x)
        x = x.mean(dim=2)  # Global average pooling to reduce the sequence dimension
        return F.log_softmax(x, dim=1)

# Custom loss function
def custom_loss(output, target):
    loss = F.cross_entropy(output, target, reduction='none')
    # Increase penalty for stomach vs. back misclassification
    stomach_back_mask = (target == 1) & (output.argmax(dim=1) == 0)
    loss = torch.where(stomach_back_mask, loss * 2, loss)
    return loss.mean()  # Take the mean of the loss

# Objective function for Optuna
def objective(trial):
    base_filters = trial.suggest_int('base_filters', 16, 64, step=16)
    learning_rate = trial.suggest_categorical('learning_rate', [1e-2, 1e-3, 1e-4])

    model = UNet1D(n_channels=1, n_classes=6, base_filters=base_filters).to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=32, shuffle=True)
    val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=32, shuffle=False)

    for epoch in range(50):
        model.train()
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            output = model(batch_X.to(device))
            loss = custom_loss(output, batch_y.to(device))
            loss.backward()
            optimizer.step()

        model.eval()
        val_loss = 0
        correct = 0
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                output = model(batch_X.to(device))
                val_loss += custom_loss(output, batch_y.to(device)).item()
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(batch_y.view_as(pred).to(device)).sum().item()

        val_loss /= len(val_loader.dataset)
        accuracy = correct / len(val_loader.dataset)

        trial.report(accuracy, epoch)

        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return accuracy

# Function to evaluate the model
def evaluate_model(model, X, y):
    model.eval()
    with torch.no_grad():
        output = model(X.to(device))
    predictions = output.argmax(dim=1)
    accuracy = (predictions == y.to(device)).float().mean().item()
    return accuracy

# Main function to load data, create model, and train the model
def main():
    print("Upload the training data CSV file:")
    train_data_file = files.upload()
    train_file_path = list(train_data_file.keys())[0]
    global X_train, y_train
    X_train, y_train = preprocess_data(train_file_path)

    print("Upload the internal validation data CSV file:")
    val_data_file = files.upload()
    val_file_path = list(val_data_file.keys())[0]
    global X_val, y_val
    X_val, y_val = preprocess_data(val_file_path)

    print("Upload the accuracy test data CSV file:")
    test_data_file = files.upload()
    test_file_path = list(test_data_file.keys())[0]
    global X_test, y_test
    X_test, y_test = preprocess_data(test_file_path)

    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=10)

    best_trial = study.best_trial
    best_params = best_trial.params
    print(f"Best trial parameters: {best_params}")

    # Extract model parameters from best_params
    base_filters = best_params['base_filters']
    learning_rate = best_params['learning_rate']

    best_model = UNet1D(n_channels=1, n_classes=6, base_filters=base_filters).to(device)
    optimizer = optim.Adam(best_model.parameters(), lr=learning_rate)

    train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=32, shuffle=True)
    val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=32, shuffle=False)

    for epoch in range(50):
        best_model.train()
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            output = best_model(batch_X.to(device))
            loss = custom_loss(output, batch_y.to(device))
            loss.backward()
            optimizer.step()

    accuracy = evaluate_model(best_model, X_test, y_test)
    print(f"Model evaluation accuracy: {accuracy * 100:.2f}%")
    torch.save(best_model.state_dict(), 'smart_mattress_model.pth')
    files.download('smart_mattress_model.pth')
    print("Model training complete and saved as 'smart_mattress_model.pth'.")

if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    main()

Upload the training data CSV file:


Saving merged_data.csv to merged_data (6).csv
Upload the internal validation data CSV file:


Saving merged_data.csv to merged_data (7).csv
Upload the accuracy test data CSV file:


[I 2024-07-30 09:10:11,805] A new study created in memory with name: no-name-906723d3-d628-4b76-b9f4-7912ef4433b7


Saving merged_data.csv to merged_data (8).csv


[I 2024-07-30 09:17:18,065] Trial 0 finished with value: 0.8921417565485362 and parameters: {'base_filters': 48, 'learning_rate': 0.001}. Best is trial 0 with value: 0.8921417565485362.
[I 2024-07-30 09:19:29,717] Trial 1 finished with value: 0.8998459167950693 and parameters: {'base_filters': 16, 'learning_rate': 0.001}. Best is trial 1 with value: 0.8998459167950693.
[I 2024-07-30 09:21:42,725] Trial 2 finished with value: 0.9198767334360555 and parameters: {'base_filters': 16, 'learning_rate': 0.01}. Best is trial 2 with value: 0.9198767334360555.
[I 2024-07-30 09:28:36,748] Trial 3 finished with value: 0.9476117103235747 and parameters: {'base_filters': 48, 'learning_rate': 0.001}. Best is trial 3 with value: 0.9476117103235747.
[I 2024-07-30 09:32:31,844] Trial 4 finished with value: 0.8967642526964561 and parameters: {'base_filters': 32, 'learning_rate': 0.0001}. Best is trial 3 with value: 0.9476117103235747.
[I 2024-07-30 09:32:48,698] Trial 5 pruned. 
[I 2024-07-30 09:32:52,22

Best trial parameters: {'base_filters': 48, 'learning_rate': 0.001}
Model evaluation accuracy: 91.53%


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Model training complete and saved as 'smart_mattress_model.pth'.
