# Importing Dataset

In [104]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import os

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE

import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset
import itertools

from sklearn.metrics import roc_curve, auc
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

# importing data from AutoSleep App
df = pd.read_csv('AutoSleep-20210330-to-20240114.csv')

# Data Cleaning and Preprocessing

In [105]:
# Delete the first column
df = df.iloc[:, 1:]

# Delete all columns with full of Na
df = df.dropna(axis=1, how='all')

df = df.drop('sessions', axis=1)
df = df.drop('dayBPM', axis=1)
df = df.drop('dayBPMAvg7', axis=1)

pd.set_option('display.max_columns', None)
df.head()

Unnamed: 0,fromDate,toDate,bedtime,waketime,inBed,awake,fellAsleepIn,asleep,asleepAvg7,efficiency,efficiencyAvg7,quality,qualityAvg7,deep,deepAvg7,sleepBPM,sleepBPMAvg7,wakingBPM,wakingBPMAvg7,hrv,hrvAvg7,sleepHRV,sleepHRVAvg7,respAvg,respMin,respMax
0,"Wednesday, Mar 31, 2021","Thursday, Apr 1, 2021",2021-04-01 12:15:00,2021-04-01 19:26:00,07:11:00,00:05:00,00:00:00,07:06:00,07:06:00,98.8,98.8,05:32:47,05:32:47,04:28:13,04:28:13,45.7,45.7,53.0,53.0,112.0,112.0,55.0,98.0,,,
1,"Thursday, Apr 1, 2021","Friday, Apr 2, 2021",2021-04-02 12:45:00,2021-04-02 20:15:00,07:30:00,00:00:00,00:00:00,07:30:00,07:18:00,100.0,99.4,05:56:02,05:44:24,04:15:00,04:21:36,46.8,46.3,46.0,49.5,153.0,132.0,61.0,82.0,,,
2,"Friday, Apr 2, 2021","Saturday, Apr 3, 2021",2021-04-03 11:04:23,2021-04-03 19:54:00,08:49:36,00:22:36,00:11:37,08:27:00,07:41:00,95.7,98.2,07:24:53,06:17:54,03:50:27,04:11:13,45.7,46.1,45.0,48.0,130.0,131.0,76.0,85.0,,,
3,"Saturday, Apr 3, 2021","Sunday, Apr 4, 2021",2021-04-04 12:08:15,2021-04-04 22:04:00,09:55:44,00:03:44,00:03:45,09:52:00,08:13:45,99.4,98.5,08:44:39,06:54:35,05:03:35,04:24:18,45.5,45.9,46.0,47.5,86.0,120.0,58.0,76.0,,,
4,"Sunday, Apr 4, 2021","Monday, Apr 5, 2021",2021-04-05 11:18:41,2021-04-05 18:54:00,07:35:18,01:09:18,00:49:19,06:26:00,07:52:12,84.8,95.7,06:03:48,06:44:26,04:17:20,04:22:55,45.6,45.9,43.0,46.6,76.0,111.0,59.0,65.0,,,


Here we can see that we have these variables, Because of the limited capabilities of my wearable, the only fields I was able to record were the following:
*   From Date/To Date: the date the sleep session was recorded in
*   Bedtime / Waketime: the time you went to bed and awoke.
*   InBed: how long you were in bed for, shown in hours, minutes and seconds
*   Awake: how long you were awake for, shown in hours, minutes and seconds
*   Fell Asleep: the amount time it took you to fall asleep
*   Asleep / AsleepAvg7: the sleep duration recorded along with the 7 day sleep duration average on that date
*   Efficiency / EfficiencyAvg7: the ratio of time asleep versus time spent in bed along with the 7 day efficiency average
*   Quality / QualityAvg7: quality considers how long you have slept, how restless you've been and your sleeping heart rate. It is shown as hours, minutes and seconds along with a 7 day sleep quality average
*   Deep / DeepAvg7: where your heart rate slows and your muscles relax to a point where you barely move. It is shown as hours, minutes and seconds along with a 7 day deep sleep average
*   SleepBPM / SleepBPMAvg7: your average heart rate shown in beats per minute for the sleep, along with a 7 day average of your sleeping heart rate
*   DayBPM / DayBPMAvg7: your average heart rate outside of your sleep, generally during the day for most users, shown in beats per minute, along with a 7 day average of your daily heart rate
*   WakingBPM / WakingBPMAvg7: your waking pulse shown in beats per minute which is automatically captured by AutoSleep , along with a 7 day waking pulse average
*   HRV / HRVAvg7: your Heart Rate Variability. AutoSleep will use the maximum value where multiple values exist for the same date. This will also show a 7 day average of your HRV
*   sleepHRV / sleepHRVAvg7: your sleep Heart Rate Variability.
*   respAvg / respMin /	respMax: Average, minimum and maximum respiration

Next the data in the form of hour:minute:second in OBJECT format needs to be converted to minute data in FLOAT format to facilitate further exploratory data analysis or building machine learning models.

In [106]:
# Columns that need to be converted from hh:mm:ss to float64
time_columns = ['inBed', 'awake', 'fellAsleepIn', 'asleep', 'asleepAvg7', 'quality', 'qualityAvg7', 'deep', 'deepAvg7']

# Function to convert time string to minutes
def time_to_minutes(time_str):
    if pd.isna(time_str) or isinstance(time_str, float):
        return time_str
    hours, minutes, seconds = map(int, time_str.split(':'))
    return hours * 60 + minutes + seconds / 60

# Convert the time data to minutes
for col in time_columns:
    df[col] = df[col].apply(time_to_minutes)

# Feature Engineering




Extract week, month, day data from fromdate

In [107]:
import datetime

df['fromDate'] = pd.to_datetime(df['fromDate'])

df['toDate'] = pd.to_datetime(df['toDate'])

df['day_of_week'] = df['fromDate'].dt.day_name()

df['Week'] = df['fromDate'].dt.weekday

df['Month'] = df['fromDate'].dt.month

df['Day'] = df['fromDate'].dt.day

In [108]:
df['bedtime'] = pd.to_datetime(df['bedtime']).dt.strftime("%H:%M:%S")
df['waketime'] = pd.to_datetime(df['waketime']).dt.strftime("%H:%M:%S")

In [109]:
def time_to_float(time_str):
    time = pd.to_datetime(time_str, format='%H:%M:%S')
    return time.hour * 60 + time.minute + time.second / 60

# convert form datetime to float
df['bedtime_float'] = df['bedtime'].apply(time_to_float)
df['waketime_float'] = df['waketime'].apply(time_to_float)

Converting the day_of_week column, which includes all days of the week, to one-hot encoded format

In [110]:
df = pd.get_dummies(df, columns=['day_of_week'])

In [111]:
lag_features = ['asleepAvg7', 'efficiencyAvg7', 'qualityAvg7', 'deepAvg7', 'sleepBPMAvg7', 'hrvAvg7', 'sleepHRVAvg7']
for feature in lag_features:
    df[feature + '_lag'] = df[feature].shift(1)

Because of the limitations of the record, I have only 400 or so entries that are non-null data that contain respiratory information, and 600 or so entries that are non-null data that do not contain respiratory information, and I keep both parts of the data.

In [112]:
#split the data to df1 and df2
#df1 is data that without any NA
df1 = df.drop(df.columns[-13:-10], axis=1)
df1 = df1.dropna()
#df_resp is data that with only 400+ data but has columns 'resp'
df_resp = df.dropna()

In [113]:
df_resp.head()

Unnamed: 0,fromDate,toDate,bedtime,waketime,inBed,awake,fellAsleepIn,asleep,asleepAvg7,efficiency,efficiencyAvg7,quality,qualityAvg7,deep,deepAvg7,sleepBPM,sleepBPMAvg7,wakingBPM,wakingBPMAvg7,hrv,hrvAvg7,sleepHRV,sleepHRVAvg7,respAvg,respMin,respMax,Week,Month,Day,bedtime_float,waketime_float,day_of_week_Friday,day_of_week_Monday,day_of_week_Saturday,day_of_week_Sunday,day_of_week_Thursday,day_of_week_Tuesday,day_of_week_Wednesday,asleepAvg7_lag,efficiencyAvg7_lag,qualityAvg7_lag,deepAvg7_lag,sleepBPMAvg7_lag,hrvAvg7_lag,sleepHRVAvg7_lag
159,2021-09-22,2021-09-23,10:43:40,17:46:00,422.316667,20.316667,1.333333,402.0,453.7,95.2,88.0,336.333333,378.666667,139.15,182.7,53.3,50.7,52.0,50.9,73.0,94.0,62.0,70.0,17.2,14.5,23.0,2,9,22,643.666667,1066.0,0,0,0,0,0,0,1,456.7,87.9,381.183333,187.45,49.9,95.0,69.0
160,2021-09-23,2021-09-24,11:44:12,18:00:00,375.783333,62.783333,61.783333,313.0,446.85,83.3,90.9,269.883333,371.85,140.85,178.166667,49.5,50.8,50.0,50.0,131.0,99.0,82.0,72.0,17.9,16.5,19.5,3,9,23,704.2,1080.0,0,0,0,0,1,0,0,453.7,88.0,378.666667,182.7,50.7,94.0,70.0
161,2021-09-24,2021-09-25,11:12:29,19:47:00,514.5,60.5,33.516667,454.0,445.283333,88.2,89.2,371.116667,369.883333,181.6,177.533333,49.0,50.7,51.0,50.6,105.0,104.0,79.0,76.0,16.9,14.0,20.5,4,9,24,672.483333,1187.0,1,0,0,0,0,0,0,446.85,90.9,371.85,178.166667,50.8,99.0,72.0
162,2021-09-25,2021-09-26,11:15:48,18:59:00,494.183333,51.183333,51.2,443.0,434.283333,88.9,87.9,361.033333,366.083333,174.3,182.166667,47.9,50.3,46.0,50.1,137.0,103.0,71.0,75.0,17.1,15.0,19.0,5,9,25,675.8,1139.0,0,0,1,0,0,0,0,445.283333,89.2,369.883333,177.533333,50.7,104.0,76.0
163,2021-09-26,2021-09-27,11:04:58,18:45:00,460.016667,82.016667,70.033333,378.0,410.416667,82.2,85.4,313.833333,344.566667,151.2,168.75,46.7,48.9,46.0,49.0,321.0,137.0,116.0,80.0,17.4,16.5,21.0,6,9,26,664.966667,1125.0,0,0,0,1,0,0,0,434.283333,87.9,366.083333,182.166667,50.3,103.0,75.0




---



In [114]:
features = df_resp[['bedtime_float', 'asleepAvg7_lag', 'efficiencyAvg7_lag', 'qualityAvg7_lag', 'deep', 'deepAvg7_lag', 'sleepBPMAvg7_lag', 'wakingBPM', 'wakingBPMAvg7', 'hrv', 'hrvAvg7_lag', 'sleepHRVAvg7_lag', 'respAvg', 'respMin', 'respMax', 'day_of_week_Friday','day_of_week_Monday','day_of_week_Saturday','day_of_week_Sunday','day_of_week_Thursday','day_of_week_Tuesday','day_of_week_Wednesday']]
target = df_resp['waketime_float']

X = features.values
y = target.values

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# MLP

In [116]:
# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert arrays to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

# Create Tensor datasets and data loaders
train_data = TensorDataset(X_train, y_train)
test_data = TensorDataset(X_test, y_test)

train_loader = DataLoader(dataset=train_data, batch_size=64, shuffle=True)
valid_loader = DataLoader(dataset=test_data, batch_size=64, shuffle=False)

class MLP(nn.Module):
    def __init__(self, input_size, dropout_rate=0.2):
        super(MLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.ReLU(),
            nn.Dropout(p=dropout_rate),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(p=dropout_rate),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(p=dropout_rate),
            nn.Linear(128, 1)
        )

    def forward(self, x):
        return self.layers(x)

# Initialize the MLP

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = MLP(input_size=X.shape[1])
model = model.to(device)
print(device)

epoch_num = 500
learning_rate = 0.006
weight_decay = 1e-3

def train_and_evaluate(model, train_loader, valid_loader, num_epochs = 20, learning_rate=0.01, weight_decay=0.0, patience = 5):
    train_losses = []
    val_losses = []
    val_maes = []

    model.to(device)
    # Define loss function
    criterion = nn.MSELoss()
    # Define optimizer with hyperparameters
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    # Early stopping initialization
    the_last_loss = float('inf')
    epochs_no_improve = 0
    best_epoch = 0

    for epoch in range(num_epochs):
        # Training loop
        model.train()  # Set the model to training mode
        total_train_loss = 0.0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_train_loss += loss.item() * inputs.size(0)

        avg_train_loss = total_train_loss / len(train_loader.dataset)
        train_losses.append(avg_train_loss)

        # Validation loop
        model.eval()  # Set the model to evaluation mode
        total_val_loss = 0.0

        all_preds = []
        all_labels = []

        with torch.no_grad():
            for inputs, labels in valid_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs.squeeze(), labels)
                total_val_loss += loss.item() * inputs.size(0)
                all_preds.extend(outputs.squeeze().cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        avg_val_loss = total_val_loss / len(train_loader.dataset)
        val_losses.append(avg_val_loss)

        mae = mean_absolute_error(all_labels, all_preds)
        val_maes.append(mae)

        # Print statistics
        print(f'Epoch {epoch+1}/{num_epochs}, Training Loss: {avg_train_loss:.4f}, Validation Loss: {avg_val_loss:.4f}')

        # Early stopping and model checkpointing
        if avg_val_loss < the_last_loss:
            epochs_no_improve = 0
            the_last_loss = avg_val_loss
            best_epoch = epoch
            torch.save(model.state_dict(), 'best_model.pth')  # Save the model checkpoint
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print(f'Early stopping triggered after {epoch + 1} epochs!')
                break

    return train_losses, val_losses, val_maes, best_epoch

# Train base MLP model
train_losses, val_losses, val_maes, best_epoch = train_and_evaluate(
    model, train_loader, valid_loader, num_epochs=epoch_num, learning_rate=learning_rate, weight_decay=weight_decay)

  y_train = torch.tensor(y_train, dtype=torch.float32)
  y_test = torch.tensor(y_test, dtype=torch.float32)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


cpu
Epoch 1/500, Training Loss: 1244264.3955, Validation Loss: 105904.2986
Epoch 2/500, Training Loss: 503892.5443, Validation Loss: 32131.9832
Epoch 3/500, Training Loss: 272626.6352, Validation Loss: 11488.3293
Epoch 4/500, Training Loss: 185279.5847, Validation Loss: 10223.8526
Epoch 5/500, Training Loss: 119285.8999, Validation Loss: 7526.2695
Epoch 6/500, Training Loss: 109041.9406, Validation Loss: 6576.9689
Epoch 7/500, Training Loss: 101298.6197, Validation Loss: 6315.3426


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 8/500, Training Loss: 96324.5909, Validation Loss: 5994.9843
Epoch 9/500, Training Loss: 90452.8311, Validation Loss: 5708.6892
Epoch 10/500, Training Loss: 87722.4307, Validation Loss: 5186.3786
Epoch 11/500, Training Loss: 85524.9821, Validation Loss: 5604.7865
Epoch 12/500, Training Loss: 85223.8986, Validation Loss: 5037.9961


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 13/500, Training Loss: 82738.6408, Validation Loss: 4957.7238
Epoch 14/500, Training Loss: 81179.7697, Validation Loss: 4670.4965
Epoch 15/500, Training Loss: 81674.3887, Validation Loss: 5013.1959
Epoch 16/500, Training Loss: 81245.5909, Validation Loss: 4873.1988
Epoch 17/500, Training Loss: 81017.7232, Validation Loss: 4816.5351
Epoch 18/500, Training Loss: 80500.2700, Validation Loss: 5130.1169
Epoch 19/500, Training Loss: 80996.6207, Validation Loss: 4958.9001
Early stopping triggered after 19 epochs!


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


In [117]:
print(f"Best epoch: {best_epoch + 1}")
print(f"Training MSE at best epoch: {train_losses[best_epoch]:.4f}")
print(f"Validation MSE at best epoch: {val_losses[best_epoch]:.4f}")
print(f"Validation MAE at best epoch: {val_maes[best_epoch]:.4f}")

Best epoch: 14
Training MSE at best epoch: 81179.7697
Validation MSE at best epoch: 4670.4965
Validation MAE at best epoch: 124.4076


# LSTM

In [102]:
# Data standardization
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Reshape data to fit LSTM inputs
def reshape_for_lstm(X, y, time_steps):
    X_reshaped = []
    y_reshaped = []
    for i in range(len(X) - time_steps):
        X_reshaped.append(X[i: i + time_steps])
        y_reshaped.append(y[i + time_steps])
    return np.array(X_reshaped), np.array(y_reshaped)

time_steps = 7  # Time step
X_train_reshaped, y_train_reshaped = reshape_for_lstm(X_train, y_train, time_steps)
X_test_reshaped, y_test_reshaped = reshape_for_lstm(X_test, y_test, time_steps)

# Converted to PyTorch tensor
X_train_tensor = torch.tensor(X_train_reshaped, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_reshaped, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_reshaped, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_reshaped, dtype=torch.float32)

# Create the Tensor dataset and DataLoader
train_data = TensorDataset(X_train_tensor, y_train_tensor)
test_data = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(dataset=train_data, batch_size=64, shuffle=True)
valid_loader = DataLoader(dataset=test_data, batch_size=64, shuffle=False)

# Define the LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout_rate=0.2):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate)
        self.linear = nn.Linear(hidden_size, 1)

    def forward(self, x):
        lstm_out, (h_n, c_n) = self.lstm(x)
        out = self.linear(lstm_out[:, -1, :])
        return out

# Model parameters
input_size = X_train_reshaped.shape[2]  # of features
hidden_size = 50  # of LSTM cells
num_layers = 3   # of LSTM layers

# Initialize the model
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = LSTMModel(input_size, hidden_size, num_layers)
model.to(device)

# Training parameters
learning_rate = 0.1
weight_decay = 1e-3
epoch_num = 500

# Define training and evaluation functions
def train_and_evaluate(model, train_loader, valid_loader, num_epochs, learning_rate, weight_decay):
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    # Parameters of the early stop method
    the_last_loss = float('inf')
    epochs_no_improve = 0
    patience = 5
    best_epoch = 0

    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0.0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item() * inputs.size(0)

        avg_train_loss = total_train_loss / len(train_loader.dataset)

        model.eval()
        total_val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in valid_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                total_val_loss += loss.item() * inputs.size(0)

        avg_val_loss = total_val_loss / len(valid_loader.dataset)

        print(f'Epoch {epoch+1}/{num_epochs}, Training Loss: {avg_train_loss:.4f}, Validation Loss: {avg_val_loss:.4f}')

        if avg_val_loss < the_last_loss:
            the_last_loss = avg_val_loss
            best_epoch = epoch
            epochs_no_improve = 0
            torch.save(model.state_dict(), 'best_model.pth')
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print(f'Early stopping triggered after {epoch + 1} epochs!')
                break

    return avg_train_loss, avg_val_loss, best_epoch

# Training models
train_loss, val_loss, best_epoch = train_and_evaluate(model, train_loader, valid_loader, epoch_num, learning_rate, weight_decay)
print(f'Best Epoch: {best_epoch+1}, Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1/500, Training Loss: 1299074.4287, Validation Loss: 1246290.1250
Epoch 2/500, Training Loss: 1249618.1905, Validation Loss: 1196540.6250
Epoch 3/500, Training Loss: 1199842.7139, Validation Loss: 1147459.7500
Epoch 4/500, Training Loss: 1151175.9931, Validation Loss: 1099620.6250
Epoch 5/500, Training Loss: 1104209.3741, Validation Loss: 1053134.2500
Epoch 6/500, Training Loss: 1058185.5208, Validation Loss: 1008322.0625
Epoch 7/500, Training Loss: 1013932.4617, Validation Loss: 965005.8750
Epoch 8/500, Training Loss: 971131.5827, Validation Loss: 923237.9375
Epoch 9/500, Training Loss: 929988.8533, Validation Loss: 882928.5625
Epoch 10/500, Training Loss: 890423.3329, Validation Loss: 844017.7500
Epoch 11/500, Training Loss: 852138.1633, Validation Loss: 806514.5625
Epoch 12/500, Training Loss: 815133.2732, Validation Loss: 770513.6875
Epoch 13/500, Training Loss: 779525.6902, Validation Loss: 735930.4375
Epoch 14/500, Training Loss: 745621.4489, Validation Loss: 702529.4375
Ep

# Random Forest

In [118]:
# Data standardization
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Data standardization
rf_regressor = RandomForestRegressor(n_estimators=5000, random_state=42)

# Training models
rf_regressor.fit(X_train_scaled, y_train)

# Predictions
y_pred_train = rf_regressor.predict(X_train_scaled)
y_pred_test = rf_regressor.predict(X_test_scaled)

# Calculate MSE
mse_train = mean_squared_error(y_train, y_pred_train)
mse_test = mean_squared_error(y_test, y_pred_test)

# Calculate MAE
mae_train = mean_absolute_error(y_train, y_pred_train)
mae_test = mean_absolute_error(y_test, y_pred_test)

print(f"Training MSE: {mse_train}")
print(f"Test MSE: {mse_test}")
print(f"Training MAE: {mae_train}")
print(f"Test MAE: {mae_test}")

Training MSE: 955.0525179094545
Test MSE: 2061.131082118368
Training MAE: 17.186164545454545
Test MAE: 35.83548163265306


# GBM

In [119]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error

# Data standardization
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize the gradient lifter regressor
gb_regressor = GradientBoostingRegressor(n_estimators=1000, learning_rate=0.01, max_depth=4, random_state=42)

# Training models
gb_regressor.fit(X_train_scaled, y_train)

# Make projections
y_pred_train = gb_regressor.predict(X_train_scaled)
y_pred_test = gb_regressor.predict(X_test_scaled)

# Calculate MSE for training and test sets
mse_train = mean_squared_error(y_train, y_pred_train)
mse_test = mean_squared_error(y_test, y_pred_test)

# Calculate MAE
mae_train = mean_absolute_error(y_train, y_pred_train)
mae_test = mean_absolute_error(y_test, y_pred_test)

# Output results
print(f"Training MSE: {mse_train}")
print(f"Test MSE: {mse_test}")
print(f"Training MAE: {mae_train}")
print(f"Test MAE: {mae_test}")

Training MSE: 462.9060524891934
Test MSE: 2018.7942548184033
Training MAE: 16.12351207743088
Test MAE: 35.13821900636575


# KNN

In [120]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler


scaler = StandardScaler()


X_train_scaled = scaler.fit_transform(X_train)


X_test_scaled = scaler.transform(X_test)


knn = KNeighborsRegressor(n_neighbors=4)


knn.fit(X_train_scaled, y_train)


y_pred = knn.predict(X_test_scaled)


mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Calculate MAE
mae_test = mean_absolute_error(y_test, y_pred)
print(f"Test MAE: {mae_test}")

Mean Squared Error: 2562.88134765625
Test MAE: 41.382652282714844
