In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf

from imblearn.over_sampling import RandomOverSampler
import numpy as np
from pathlib import Path
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, classification_report




In [2]:
sleep_df = pd.read_csv("Sleep_Efficiency_Updated.csv")
sleep_df.head()

Unnamed: 0,ID,Age,Gender,Bedtime,Wakeup time,Sleep duration,Sleep efficiency,REM sleep percentage,Deep sleep percentage,Light sleep percentage,Awakenings,Caffeine consumption,Alcohol consumption,Smoking status,Exercise frequency
0,1,65,Female,06/03/2021 01:00,06/03/2021 07:00,6.0,0.88,18,70,12,0.0,0.0,0.0,Yes,3.0
1,2,69,Male,05/12/2021 02:00,05/12/2021 09:00,7.0,0.66,19,28,53,3.0,0.0,3.0,Yes,3.0
2,3,40,Female,25/05/2021 21:30,25/05/2021 05:30,8.0,0.89,20,70,10,1.0,0.0,0.0,No,3.0
3,4,40,Female,03/11/2021 02:30,03/11/2021 08:30,6.0,0.51,23,25,52,3.0,50.0,5.0,Yes,1.0
4,5,57,Male,13/03/2021 01:00,13/03/2021 09:00,8.0,0.76,27,55,18,3.0,0.0,3.0,No,3.0


In [3]:
sleep_df = sleep_df.drop(columns=['ID','Bedtime', 'Wakeup time'])

In [4]:
#sleep_df = sleep_df.dropna()

In [5]:
sleep_df.nunique()

Age                       61
Gender                     2
Sleep duration             9
Sleep efficiency          50
REM sleep percentage      13
Deep sleep percentage     29
Light sleep percentage    29
Awakenings                 5
Caffeine consumption       6
Alcohol consumption        6
Smoking status             2
Exercise frequency         6
dtype: int64

In [6]:
sleep_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 618 entries, 0 to 617
Data columns (total 12 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Age                     618 non-null    int64  
 1   Gender                  618 non-null    object 
 2   Sleep duration          618 non-null    float64
 3   Sleep efficiency        618 non-null    float64
 4   REM sleep percentage    618 non-null    int64  
 5   Deep sleep percentage   618 non-null    int64  
 6   Light sleep percentage  618 non-null    int64  
 7   Awakenings              591 non-null    float64
 8   Caffeine consumption    582 non-null    float64
 9   Alcohol consumption     597 non-null    float64
 10  Smoking status          618 non-null    object 
 11  Exercise frequency      611 non-null    float64
dtypes: float64(6), int64(4), object(2)
memory usage: 58.1+ KB


In [7]:
sleep_df = pd.get_dummies(sleep_df)

In [8]:
sleep_df

Unnamed: 0,Age,Sleep duration,Sleep efficiency,REM sleep percentage,Deep sleep percentage,Light sleep percentage,Awakenings,Caffeine consumption,Alcohol consumption,Exercise frequency,Gender_Female,Gender_Male,Smoking status_No,Smoking status_Yes
0,65,6.0,0.88,18,70,12,0.0,0.0,0.0,3.0,1,0,0,1
1,69,7.0,0.66,19,28,53,3.0,0.0,3.0,3.0,0,1,0,1
2,40,8.0,0.89,20,70,10,1.0,0.0,0.0,3.0,1,0,1,0
3,40,6.0,0.51,23,25,52,3.0,50.0,5.0,1.0,1,0,0,1
4,57,8.0,0.76,27,55,18,3.0,0.0,3.0,3.0,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
613,61,7.0,0.67,23,23,54,2.0,50.0,5.0,0.0,1,0,1,0
614,30,8.0,0.94,22,63,15,1.0,75.0,0.0,2.0,1,0,0,1
615,48,6.0,0.79,24,60,16,4.0,0.0,0.0,2.0,0,1,0,1
616,32,5.0,0.86,20,65,15,1.0,25.0,1.0,0.0,1,0,1,0


In [9]:
sleep_df['Sleep efficiency'] = sleep_df['Sleep efficiency'].apply(lambda x: 1 if x > 0.85 else 0)

In [10]:
sleep_df

Unnamed: 0,Age,Sleep duration,Sleep efficiency,REM sleep percentage,Deep sleep percentage,Light sleep percentage,Awakenings,Caffeine consumption,Alcohol consumption,Exercise frequency,Gender_Female,Gender_Male,Smoking status_No,Smoking status_Yes
0,65,6.0,1,18,70,12,0.0,0.0,0.0,3.0,1,0,0,1
1,69,7.0,0,19,28,53,3.0,0.0,3.0,3.0,0,1,0,1
2,40,8.0,1,20,70,10,1.0,0.0,0.0,3.0,1,0,1,0
3,40,6.0,0,23,25,52,3.0,50.0,5.0,1.0,1,0,0,1
4,57,8.0,0,27,55,18,3.0,0.0,3.0,3.0,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
613,61,7.0,0,23,23,54,2.0,50.0,5.0,0.0,1,0,1,0
614,30,8.0,1,22,63,15,1.0,75.0,0.0,2.0,1,0,0,1
615,48,6.0,0,24,60,16,4.0,0.0,0.0,2.0,0,1,0,1
616,32,5.0,1,20,65,15,1.0,25.0,1.0,0.0,1,0,1,0


In [11]:
y = sleep_df['Sleep efficiency'].values

X = sleep_df.drop(columns=['Sleep efficiency']).values

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    random_state=1, 
                                                    stratify=y)

In [13]:
scaler = StandardScaler()

X_scaler = scaler.fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [14]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.

number_input_features = len(X_train[0])
hidden_nodes_layer1 =  8
hidden_nodes_layer2 = 5


nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)


# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))


# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 8)                 112       
                                                                 
 dense_1 (Dense)             (None, 5)                 45        
                                                                 
 dense_2 (Dense)             (None, 1)                 6         
                                                                 
Total params: 163 (652.00 Byte)
Trainable params: 163 (652.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [15]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])




In [16]:
# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=50)

Epoch 1/50


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [17]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

5/5 - 1s - loss: 0.6780 - accuracy: 0.5871 - 967ms/epoch - 193ms/step
Loss: 0.677970826625824, Accuracy: 0.5870967507362366


*** Droping NaN values ***

In [18]:
sleep_df = sleep_df.dropna()

In [19]:
sleep_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 528 entries, 0 to 617
Data columns (total 14 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Age                     528 non-null    int64  
 1   Sleep duration          528 non-null    float64
 2   Sleep efficiency        528 non-null    int64  
 3   REM sleep percentage    528 non-null    int64  
 4   Deep sleep percentage   528 non-null    int64  
 5   Light sleep percentage  528 non-null    int64  
 6   Awakenings              528 non-null    float64
 7   Caffeine consumption    528 non-null    float64
 8   Alcohol consumption     528 non-null    float64
 9   Exercise frequency      528 non-null    float64
 10  Gender_Female           528 non-null    uint8  
 11  Gender_Male             528 non-null    uint8  
 12  Smoking status_No       528 non-null    uint8  
 13  Smoking status_Yes      528 non-null    uint8  
dtypes: float64(5), int64(5), uint8(4)
memory u

In [20]:
y = sleep_df['Sleep efficiency'].values

X = sleep_df.drop(columns=['Sleep efficiency']).values

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    random_state=1, 
                                                    stratify=y)

In [22]:
scaler = StandardScaler()

X_scaler = scaler.fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [23]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.

number_input_features = len(X_train[0])
hidden_nodes_layer1 =  8
hidden_nodes_layer2 = 5


nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)


# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))


# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 8)                 112       
                                                                 
 dense_4 (Dense)             (None, 5)                 45        
                                                                 
 dense_5 (Dense)             (None, 1)                 6         
                                                                 
Total params: 163 (652.00 Byte)
Trainable params: 163 (652.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [24]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [25]:
# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [26]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

5/5 - 0s - loss: 0.2611 - accuracy: 0.9015 - 393ms/epoch - 79ms/step
Loss: 0.26108482480049133, Accuracy: 0.9015151262283325


More neurons

In [27]:
# Create a new neural network model with more neurons
nn = tf.keras.models.Sequential()

nn.add(tf.keras.layers.Dense(units=80, activation="relu", input_dim=number_input_features))

nn.add(tf.keras.layers.Dense(units=80, activation="sigmoid"))

nn.add(tf.keras.layers.Dense(units=80, activation="sigmoid"))

nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

nn.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 80)                1120      
                                                                 
 dense_7 (Dense)             (None, 80)                6480      
                                                                 
 dense_8 (Dense)             (None, 80)                6480      
                                                                 
 dense_9 (Dense)             (None, 1)                 81        
                                                                 
Total params: 14161 (55.32 KB)
Trainable params: 14161 (55.32 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [28]:
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [29]:
fit_model = nn.fit(X_train_scaled, y_train, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [30]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

5/5 - 0s - loss: 0.3685 - accuracy: 0.9242 - 249ms/epoch - 50ms/step
Loss: 0.36852604150772095, Accuracy: 0.9242424368858337


In [31]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh'])
    
    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=30,
        step=5), activation=activation, input_dim=2))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 5)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=30,
            step=5),
            activation=activation))
    
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model

In [38]:
# Import the kerastuner library
import keras_tuner as kt

tuner = kt.Hyperband(
    create_model,
    objective=kt.Objective("Sleep efficiency", direction="max"),
    max_epochs=20,
    hyperband_iterations=2
)

In [39]:
# Run the kerastuner search for best hyperparameters
tuner.search(X_train_scaled,y_train,epochs=20,validation_data=(X_test_scaled,y_test))

Trial 2 Complete [00h 00m 01s]

Best Sleep efficiency So Far: None
Total elapsed time: 00h 00m 02s

Search: Running Trial #3

Value             |Best Value So Far |Hyperparameter
tanh              |relu              |activation
1                 |21                |first_units
3                 |5                 |num_layers
1                 |21                |units_0
1                 |1                 |units_1
6                 |1                 |units_2
21                |1                 |units_3
16                |1                 |units_4
3                 |3                 |tuner/epochs
0                 |0                 |tuner/initial_epoch
2                 |2                 |tuner/bracket
0                 |0                 |tuner/round

Epoch 1/3


Traceback (most recent call last):
  File "C:\Users\Brendan Smith\Downloads\Anaconda\Lib\site-packages\keras_tuner\src\engine\base_tuner.py", line 273, in _try_run_and_update_trial
    self._run_and_update_trial(trial, *fit_args, **fit_kwargs)
  File "C:\Users\Brendan Smith\Downloads\Anaconda\Lib\site-packages\keras_tuner\src\engine\base_tuner.py", line 238, in _run_and_update_trial
    results = self.run_trial(trial, *fit_args, **fit_kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Brendan Smith\Downloads\Anaconda\Lib\site-packages\keras_tuner\src\tuners\hyperband.py", line 427, in run_trial
    return super().run_trial(trial, *fit_args, **fit_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Brendan Smith\Downloads\Anaconda\Lib\site-packages\keras_tuner\src\engine\tuner.py", line 314, in run_trial
    obj_value = self._build_and_fit_model(trial, *args, **copied_kwargs)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^

RuntimeError: Number of consecutive failures exceeded the limit of 3.
Traceback (most recent call last):
  File "C:\Users\Brendan Smith\Downloads\Anaconda\Lib\site-packages\keras_tuner\src\engine\base_tuner.py", line 273, in _try_run_and_update_trial
    self._run_and_update_trial(trial, *fit_args, **fit_kwargs)
  File "C:\Users\Brendan Smith\Downloads\Anaconda\Lib\site-packages\keras_tuner\src\engine\base_tuner.py", line 238, in _run_and_update_trial
    results = self.run_trial(trial, *fit_args, **fit_kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Brendan Smith\Downloads\Anaconda\Lib\site-packages\keras_tuner\src\tuners\hyperband.py", line 427, in run_trial
    return super().run_trial(trial, *fit_args, **fit_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Brendan Smith\Downloads\Anaconda\Lib\site-packages\keras_tuner\src\engine\tuner.py", line 314, in run_trial
    obj_value = self._build_and_fit_model(trial, *args, **copied_kwargs)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Brendan Smith\Downloads\Anaconda\Lib\site-packages\keras_tuner\src\engine\tuner.py", line 233, in _build_and_fit_model
    results = self.hypermodel.fit(hp, model, *args, **kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Brendan Smith\Downloads\Anaconda\Lib\site-packages\keras_tuner\src\engine\hypermodel.py", line 149, in fit
    return model.fit(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Brendan Smith\Downloads\Anaconda\Lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "C:\Users\BRENDA~1\AppData\Local\Temp\__autograph_generated_file_1qga9hu.py", line 18, in tf__train_function
    raise
ValueError: in user code:

    File "C:\Users\Brendan Smith\Downloads\Anaconda\Lib\site-packages\keras\src\engine\training.py", line 1401, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\Brendan Smith\Downloads\Anaconda\Lib\site-packages\keras\src\engine\training.py", line 1384, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\Brendan Smith\Downloads\Anaconda\Lib\site-packages\keras\src\engine\training.py", line 1373, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\Brendan Smith\Downloads\Anaconda\Lib\site-packages\keras\src\engine\training.py", line 1150, in train_step
        y_pred = self(x, training=True)
    File "C:\Users\Brendan Smith\Downloads\Anaconda\Lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\Brendan Smith\Downloads\Anaconda\Lib\site-packages\keras\src\engine\input_spec.py", line 298, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential" is incompatible with the layer: expected shape=(None, 2), found shape=(None, 13)



In [40]:
# Get top 3 model hyperparameters and print the values
top_hyper = tuner.get_best_hyperparameters(3)
for param in top_hyper:
    print(param.values)

{'activation': 'relu', 'first_units': 21, 'num_layers': 5, 'units_0': 21, 'tuner/epochs': 3, 'tuner/initial_epoch': 0, 'tuner/bracket': 2, 'tuner/round': 0, 'units_1': 1, 'units_2': 1, 'units_3': 1, 'units_4': 1}
{'activation': 'relu', 'first_units': 26, 'num_layers': 5, 'units_0': 11, 'units_1': 6, 'units_2': 21, 'units_3': 11, 'units_4': 11, 'tuner/epochs': 3, 'tuner/initial_epoch': 0, 'tuner/bracket': 2, 'tuner/round': 0}
{'activation': 'tanh', 'first_units': 1, 'num_layers': 3, 'units_0': 1, 'units_1': 1, 'units_2': 6, 'units_3': 21, 'units_4': 16, 'tuner/epochs': 3, 'tuner/initial_epoch': 0, 'tuner/bracket': 2, 'tuner/round': 0}


In [41]:
# Evaluate the top 3 models against the test dataset

top_model = tuner.get_best_models(3)
for model in top_model:
    model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test, verbose=2)
    print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

NotFoundError: Unsuccessful TensorSliceReader constructor: Failed to find any matching files for .\untitled_project\trial_0000\checkpoint

In [57]:
# Get second best model hyperparameters
second_hyper = tuner.get_best_hyperparameters(2)[1]
second_hyper.values

{'activation': 'relu',
 'first_units': 26,
 'num_layers': 4,
 'units_0': 1,
 'tuner/epochs': 3,
 'tuner/initial_epoch': 0,
 'tuner/bracket': 2,
 'tuner/round': 0,
 'units_1': 1,
 'units_2': 1,
 'units_3': 1}

In [59]:
# Compare the performance to the second-best model
second_model = tuner.get_best_models(2)[1]
model_loss, model_accuracy = second_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

NotFoundError: Unsuccessful TensorSliceReader constructor: Failed to find any matching files for .\untitled_project\trial_0000\checkpoint

In [43]:
# Logistic Regression

In [44]:
# Separate the data into labels and features
# Separate the y variable, the labels
y = sleep_df['Sleep efficiency']
# Separate the X variable, the features
x = sleep_df[['Sleep duration', 'REM sleep percentage', 'Deep sleep percentage', 'Light sleep percentage', 'Awakenings']]

In [45]:
# Review the y variable Series
y[:5]

0    1
1    0
2    1
3    0
4    0
Name: Sleep efficiency, dtype: int64

In [46]:
# Review the X variable DataFrame
x[:5]

Unnamed: 0,Sleep duration,REM sleep percentage,Deep sleep percentage,Light sleep percentage,Awakenings
0,6.0,18,70,12,0.0
1,7.0,19,28,53,3.0
2,8.0,20,70,10,1.0
3,6.0,23,25,52,3.0
4,8.0,27,55,18,3.0


In [47]:
# Check the balance of our target values
y.value_counts()

0    310
1    218
Name: Sleep efficiency, dtype: int64

In [48]:
# Split the data using train_test_split
# Assign a random_state of 1 to the function
X_train, X_test, y_train, y_test = train_test_split(x, y, random_state=1)

In [49]:
# Import the LogisticRegression module from SKLearn
from sklearn.linear_model import LogisticRegression

# Instantiate the Logistic Regression model
# Assign a random_state parameter of 1 to the model
logistic_regression_model = LogisticRegression(solver = 'lbfgs', max_iter = 200, random_state=1)
# Fit the model using training data
logistic_regression_model.fit(X_train, y_train)

In [50]:
# Make a prediction using the testing data
predictions = logistic_regression_model.predict(X_test)

In [51]:
# Make a prediction using the testing data
y_predictions = logistic_regression_model.predict(X_test)

df_results = pd.DataFrame({
    "y_test": y_test,
    "prediction": y_predictions
})

df_results.sample(10)

Unnamed: 0,y_test,prediction
7,1,1
231,0,0
99,0,0
275,1,1
80,1,1
329,0,0
533,0,0
152,1,1
276,1,1
512,0,0


In [52]:
# Print the balanced_accuracy score of the model
balanced_accuracy_score(y_test, y_predictions)

0.8816267247639797

In [53]:
# Generate a confusion matrix for the model
confusion_matrix(y_test, y_predictions)

array([[65, 16],
       [ 2, 49]], dtype=int64)

In [54]:
# Print the classification report for the model
print("Classification Report:")
print(classification_report(y_test, y_predictions, target_names = ["Bad_sleep", "Good_sleep"]))

Classification Report:
              precision    recall  f1-score   support

   Bad_sleep       0.97      0.80      0.88        81
  Good_sleep       0.75      0.96      0.84        51

    accuracy                           0.86       132
   macro avg       0.86      0.88      0.86       132
weighted avg       0.89      0.86      0.87       132



In [55]:
# OverSampled Data

In [56]:
# Instantiate the random oversampler model
# # Assign a random_state parameter of 1 to the model
r_model = RandomOverSampler(random_state=1)


# Fit the original training data to the random_oversampler model
X_resampled, y_resampled = r_model.fit_resample(X_train, y_train)

In [57]:
# Count the distinct values of the resampled labels data
labels = y_resampled.value_counts()
num_distinct_val = len(labels)

print("Number of distinct values of the resampled labels : ",num_distinct_val )
print(labels)

Number of distinct values of the resampled labels :  2
0    229
1    229
Name: Sleep efficiency, dtype: int64


In [58]:
#Instantiate the Logistic Regression model
# Assign a random_state parameter of 1 to the model
lr_model = LogisticRegression(solver = 'lbfgs', max_iter = 200, random_state=1)

# Fit the model using training data
lr_model.fit(X_train, y_train)

# Fit the model using the resampled training data
lr_model.fit(X_resampled, y_resampled)

# Make a prediction using the testing data
predictions = lr_model.predict(X_test)

In [59]:
# Print the balanced_accuracy score of the model
balanced_accuracy_score(y_test, predictions)

0.8914306463326072

In [60]:
# Generate a confusion matrix for the model
confusion_matrix(y_test, predictions)

array([[65, 16],
       [ 1, 50]], dtype=int64)

In [61]:
# Print the classification report for the model
classification_rep = classification_report(y_test, predictions)
print("Classification Report:")
print(classification_rep)

Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.80      0.88        81
           1       0.76      0.98      0.85        51

    accuracy                           0.87       132
   macro avg       0.87      0.89      0.87       132
weighted avg       0.90      0.87      0.87       132

