### Following efforts to enhance the accuracy of our model in the "sleep_efficiency_Tensor_LogReg_Final.ipynb" file, which involved data preprocessing and increasing the number of neurons, we achieved a 90% accuracy. This served as a benchmark for assessing the influence of removing columns from the feature set (X) to determine their impact on the model.

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf




# Preprocessing

In [2]:
# Read CSV
sleep_df = pd.read_csv("Sleep_Efficiency_Updated.csv")
sleep_df.head()

Unnamed: 0,ID,Age,Gender,Bedtime,Wakeup time,Sleep duration,Sleep efficiency,REM sleep percentage,Deep sleep percentage,Light sleep percentage,Awakenings,Caffeine consumption,Alcohol consumption,Smoking status,Exercise frequency
0,1,65,Female,06/03/2021 01:00,06/03/2021 07:00,6.0,0.88,18,70,12,0.0,0.0,0.0,Yes,3.0
1,2,69,Male,05/12/2021 02:00,05/12/2021 09:00,7.0,0.66,19,28,53,3.0,0.0,3.0,Yes,3.0
2,3,40,Female,25/05/2021 21:30,25/05/2021 05:30,8.0,0.89,20,70,10,1.0,0.0,0.0,No,3.0
3,4,40,Female,03/11/2021 02:30,03/11/2021 08:30,6.0,0.51,23,25,52,3.0,50.0,5.0,Yes,1.0
4,5,57,Male,13/03/2021 01:00,13/03/2021 09:00,8.0,0.76,27,55,18,3.0,0.0,3.0,No,3.0


In [3]:
# Drop non-beneficial columns
sleep_df = sleep_df.drop(columns=['ID','Bedtime', 'Wakeup time'])

In [4]:
# Drop null values
sleep_df = sleep_df.dropna()

In [5]:
sleep_df.nunique()

Age                       60
Gender                     2
Sleep duration             9
Sleep efficiency          50
REM sleep percentage      13
Deep sleep percentage     28
Light sleep percentage    28
Awakenings                 5
Caffeine consumption       6
Alcohol consumption        6
Smoking status             2
Exercise frequency         6
dtype: int64

In [6]:
sleep_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 528 entries, 0 to 617
Data columns (total 12 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Age                     528 non-null    int64  
 1   Gender                  528 non-null    object 
 2   Sleep duration          528 non-null    float64
 3   Sleep efficiency        528 non-null    float64
 4   REM sleep percentage    528 non-null    int64  
 5   Deep sleep percentage   528 non-null    int64  
 6   Light sleep percentage  528 non-null    int64  
 7   Awakenings              528 non-null    float64
 8   Caffeine consumption    528 non-null    float64
 9   Alcohol consumption     528 non-null    float64
 10  Smoking status          528 non-null    object 
 11  Exercise frequency      528 non-null    float64
dtypes: float64(6), int64(4), object(2)
memory usage: 53.6+ KB


In [7]:
# Get Dummies
sleep_df = pd.get_dummies(sleep_df,dtype=float)

In [8]:
sleep_df.head()

Unnamed: 0,Age,Sleep duration,Sleep efficiency,REM sleep percentage,Deep sleep percentage,Light sleep percentage,Awakenings,Caffeine consumption,Alcohol consumption,Exercise frequency,Gender_Female,Gender_Male,Smoking status_No,Smoking status_Yes
0,65,6.0,0.88,18,70,12,0.0,0.0,0.0,3.0,1.0,0.0,0.0,1.0
1,69,7.0,0.66,19,28,53,3.0,0.0,3.0,3.0,0.0,1.0,0.0,1.0
2,40,8.0,0.89,20,70,10,1.0,0.0,0.0,3.0,1.0,0.0,1.0,0.0
3,40,6.0,0.51,23,25,52,3.0,50.0,5.0,1.0,1.0,0.0,0.0,1.0
4,57,8.0,0.76,27,55,18,3.0,0.0,3.0,3.0,0.0,1.0,1.0,0.0


In [9]:
# Convert Sleep efficiency values > .85 equal to 1
sleep_df['Sleep efficiency'] = sleep_df['Sleep efficiency'].apply(lambda x: 1 if x > 0.85 else 0)

In [10]:
sleep_df

Unnamed: 0,Age,Sleep duration,Sleep efficiency,REM sleep percentage,Deep sleep percentage,Light sleep percentage,Awakenings,Caffeine consumption,Alcohol consumption,Exercise frequency,Gender_Female,Gender_Male,Smoking status_No,Smoking status_Yes
0,65,6.0,1,18,70,12,0.0,0.0,0.0,3.0,1.0,0.0,0.0,1.0
1,69,7.0,0,19,28,53,3.0,0.0,3.0,3.0,0.0,1.0,0.0,1.0
2,40,8.0,1,20,70,10,1.0,0.0,0.0,3.0,1.0,0.0,1.0,0.0
3,40,6.0,0,23,25,52,3.0,50.0,5.0,1.0,1.0,0.0,0.0,1.0
4,57,8.0,0,27,55,18,3.0,0.0,3.0,3.0,0.0,1.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
613,61,7.0,0,23,23,54,2.0,50.0,5.0,0.0,1.0,0.0,1.0,0.0
614,30,8.0,1,22,63,15,1.0,75.0,0.0,2.0,1.0,0.0,0.0,1.0
615,48,6.0,0,24,60,16,4.0,0.0,0.0,2.0,0.0,1.0,0.0,1.0
616,32,5.0,1,20,65,15,1.0,25.0,1.0,0.0,1.0,0.0,1.0,0.0


## Alcohol consumption Vs Sleep efficiency 

In [11]:
y = sleep_df['Sleep efficiency'].values

X = sleep_df.drop(['Alcohol consumption','Sleep efficiency'], axis=1).values

In [12]:
# Split data to training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

In [13]:
# Standard Scaler instance
scaler = StandardScaler()

# Fit Standard Scaler
X_scaler = scaler.fit(X_train)

# Scale data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [14]:
# Neural Net model, nodes, layers

number_input_features = len(X_train[0])
hidden_nodes_layer1 =  8
hidden_nodes_layer2 = 5


nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)


# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))


# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 8)                 104       
                                                                 
 dense_1 (Dense)             (None, 5)                 45        
                                                                 
 dense_2 (Dense)             (None, 1)                 6         
                                                                 
Total params: 155 (620.00 Byte)
Trainable params: 155 (620.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [15]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])




In [16]:
# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=50)

Epoch 1/50


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [17]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

5/5 - 0s - loss: 0.3024 - accuracy: 0.8788 - 338ms/epoch - 68ms/step
Loss: 0.30242735147476196, Accuracy: 0.8787878751754761


###  The decrease in accuracy suggests that removing alcohol consumption reduces the performance of the model.

## Caffeine consumption Vs Sleep efficiency

In [19]:
y = sleep_df['Sleep efficiency'].values

X = sleep_df.drop(['Sleep efficiency','Caffeine consumption'], axis=1).values

In [20]:
# Split data to training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

In [21]:
# Standard Scaler instance
scaler = StandardScaler()

# Fit Standard Scaler
X_scaler = scaler.fit(X_train)

# Scale data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [22]:
# Neural Net model, nodes, layers

number_input_features = len(X_train[0])
hidden_nodes_layer1 =  8
hidden_nodes_layer2 = 5


nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)


# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))


# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 8)                 104       
                                                                 
 dense_4 (Dense)             (None, 5)                 45        
                                                                 
 dense_5 (Dense)             (None, 1)                 6         
                                                                 
Total params: 155 (620.00 Byte)
Trainable params: 155 (620.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [23]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [24]:
# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [25]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

5/5 - 0s - loss: 0.2119 - accuracy: 0.9318 - 253ms/epoch - 51ms/step
Loss: 0.21192465722560883, Accuracy: 0.9318181872367859


### The increase in accuracy suggests that removing caffeine consumption does not influence the performance of the model.

## Smoking status Vs Sleep Efficiency

In [33]:
y = sleep_df['Sleep efficiency'].values

X = sleep_df.drop(['Sleep efficiency','Smoking status_No','Smoking status_Yes'], axis=1).values

In [34]:
# Split data to training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

In [35]:
# Standard Scaler instance
scaler = StandardScaler()

# Fit Standard Scaler
X_scaler = scaler.fit(X_train)

# Scale data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [36]:
# Neural Net model, nodes, layers

number_input_features = len(X_train[0])
hidden_nodes_layer1 =  8
hidden_nodes_layer2 = 5


nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)


# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))


# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_9 (Dense)             (None, 8)                 96        
                                                                 
 dense_10 (Dense)            (None, 5)                 45        
                                                                 
 dense_11 (Dense)            (None, 1)                 6         
                                                                 
Total params: 147 (588.00 Byte)
Trainable params: 147 (588.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [37]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [38]:
# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [39]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

5/5 - 0s - loss: 0.3309 - accuracy: 0.8712 - 272ms/epoch - 54ms/step
Loss: 0.33089521527290344, Accuracy: 0.8712121248245239


### The decrease in accuracy suggests that removing smoking status reduces the performance of the model.

## Exercise frequency Vs Sleep efficiency

In [40]:
y = sleep_df['Sleep efficiency'].values

X = sleep_df.drop(['Sleep efficiency','Exercise frequency'], axis=1).values

In [41]:
# Split data to training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

In [42]:
# Standard Scaler instance
scaler = StandardScaler()

# Fit Standard Scaler
X_scaler = scaler.fit(X_train)

# Scale data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [43]:
# Neural Net model, nodes, layers

number_input_features = len(X_train[0])
hidden_nodes_layer1 =  8
hidden_nodes_layer2 = 5


nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)


# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))


# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_12 (Dense)            (None, 8)                 104       
                                                                 
 dense_13 (Dense)            (None, 5)                 45        
                                                                 
 dense_14 (Dense)            (None, 1)                 6         
                                                                 
Total params: 155 (620.00 Byte)
Trainable params: 155 (620.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [44]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [45]:
# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [46]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

5/5 - 0s - loss: 0.2487 - accuracy: 0.8636 - 272ms/epoch - 54ms/step
Loss: 0.2487325668334961, Accuracy: 0.8636363744735718


### The decrease in accuracy suggests that removing exercise frequency reduces the performance of the model.