In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('bmh')

## EDA

"snoring range of the user", "respiration rate", "body temperature", "limb movement rate", "blood oxygen levels", "eye movement", "number of hours of sleep", "heart rate" , "Stress Levels" 

In [None]:
df = pd.read_csv('../datasets/habitat/Sleep_Stress.csv')
df.head()

In [None]:
df.describe()

Check for missing values in the dataset


In [None]:
df.info()

In [None]:
df.isna().sum()

Visualize the distribution of the variables using histograms or density plots.

In [None]:
df.hist(bins=50, figsize=(20, 15))

plt.show()

Visualize the relationship between the continuous variables and the categorical target variable using boxplots or violin plots

In [None]:
# create boxplots for each continuous variable by target variable
df.plot(kind='box', subplots=True, layout=(3, 3), sharex=False, sharey=False, figsize=(20, 15), by='sl')
plt.show()

Check for any correlations between the continuous variables using a correlation matrix heatmap.

In [None]:
#sheck correlations of variables to target variable
df.corr()['sl'][:-1]

In [None]:
corr_matrix = df.corr()

fig, ax = plt.subplots(figsize=(12,8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', ax=ax)
plt.show()

features are all highly correlated

## Model Building

training and testing

In [None]:
from sklearn.model_selection import train_test_split

X = df.iloc[:, :-1]
y = df.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

rf = RandomForestClassifier()
rf.fit(X_train, y_train)

y_pred_rf = rf.predict(X_test)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print("Accuracy of random forest:", accuracy_rf)

report_rf = classification_report(y_test, y_pred_rf)
print(report_rf)

#### save the model

In [None]:
import joblib

filename = 'sleep_random_forest_model.joblib'
joblib.dump(rf, filename)

#### load model

In [None]:
# load the saved model
loaded_model = joblib.load(filename)

# use the loaded model to make predictions
y_pred_loaded = loaded_model.predict(X_test)

### Logistic regression

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

lr = LogisticRegression()
lr.fit(X_train, y_train)

y_pred_lr = lr.predict(X_test)
accuracy_lr = accuracy_score(y_test, y_pred_lr)
print("Accuracy of logistic regression:", accuracy_lr)

report_lr = classification_report(y_test, y_pred_lr)
print(report_lr)

### Deep Learning

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# convert NumPy arrays to PyTorch tensors
X_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_tensor = torch.tensor(y_train.values, dtype=torch.long)

# create a PyTorch dataset and dataloader for the training data
train_dataset = TensorDataset(X_tensor, y_tensor)
trainloader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# define the neural network architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(8, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 5)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# create an instance of the neural network
net = Net()

# define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

# train the neural network
for epoch in range(100):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print('Epoch %d loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

In [None]:
from sklearn.metrics import classification_report

# convert NumPy arrays to PyTorch tensors
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

# make predictions on the test data
net.eval()
with torch.no_grad():
    y_pred_tensor = net(X_test_tensor)
y_pred = y_pred_tensor.argmax(dim=1).numpy()

# generate a classification report
target_names = ['class 0', 'class 1', 'class 2', 'class 3', 'class 4']
print(classification_report(y_test, y_pred, target_names=target_names))