# Project Capstone

## Import Libraries

In [265]:
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split

## Data Wrangling

In [266]:
data = pd.read_csv("./data/data.csv")
data = pd.DataFrame(data)
data.head()

Unnamed: 0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Blood Pressure,Heart Rate,Daily Steps,Sleep Disorder
0,1,Male,27,Software Engineer,6.1,6,42,6,Overweight,126/83,77,4200,
1,2,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
2,3,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
3,4,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea
4,5,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea


In [267]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 374 entries, 0 to 373
Data columns (total 13 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Person ID                374 non-null    int64  
 1   Gender                   374 non-null    object 
 2   Age                      374 non-null    int64  
 3   Occupation               374 non-null    object 
 4   Sleep Duration           374 non-null    float64
 5   Quality of Sleep         374 non-null    int64  
 6   Physical Activity Level  374 non-null    int64  
 7   Stress Level             374 non-null    int64  
 8   BMI Category             374 non-null    object 
 9   Blood Pressure           374 non-null    object 
 10  Heart Rate               374 non-null    int64  
 11  Daily Steps              374 non-null    int64  
 12  Sleep Disorder           155 non-null    object 
dtypes: float64(1), int64(7), object(5)
memory usage: 38.1+ KB


In [268]:
data.isna().sum()

Person ID                    0
Gender                       0
Age                          0
Occupation                   0
Sleep Duration               0
Quality of Sleep             0
Physical Activity Level      0
Stress Level                 0
BMI Category                 0
Blood Pressure               0
Heart Rate                   0
Daily Steps                  0
Sleep Disorder             219
dtype: int64

In [269]:
data.fillna(value="None", inplace=True)

In [270]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 374 entries, 0 to 373
Data columns (total 13 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Person ID                374 non-null    int64  
 1   Gender                   374 non-null    object 
 2   Age                      374 non-null    int64  
 3   Occupation               374 non-null    object 
 4   Sleep Duration           374 non-null    float64
 5   Quality of Sleep         374 non-null    int64  
 6   Physical Activity Level  374 non-null    int64  
 7   Stress Level             374 non-null    int64  
 8   BMI Category             374 non-null    object 
 9   Blood Pressure           374 non-null    object 
 10  Heart Rate               374 non-null    int64  
 11  Daily Steps              374 non-null    int64  
 12  Sleep Disorder           374 non-null    object 
dtypes: float64(1), int64(7), object(5)
memory usage: 38.1+ KB


In [271]:
print("Jumlah duplikasi: ", data.duplicated().sum())

Jumlah duplikasi:  0


In [272]:
dataset = data.drop(columns=['Person ID', 'Occupation', 'Blood Pressure'])
dataset.head()

Unnamed: 0,Gender,Age,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Heart Rate,Daily Steps,Sleep Disorder
0,Male,27,6.1,6,42,6,Overweight,77,4200,
1,Male,28,6.2,6,60,8,Normal,75,10000,
2,Male,28,6.2,6,60,8,Normal,75,10000,
3,Male,28,5.9,4,30,8,Obese,85,3000,Sleep Apnea
4,Male,28,5.9,4,30,8,Obese,85,3000,Sleep Apnea


In [273]:
dataset.describe()

Unnamed: 0,Age,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,Heart Rate,Daily Steps
count,374.0,374.0,374.0,374.0,374.0,374.0,374.0
mean,42.184492,7.132086,7.312834,59.171123,5.385027,70.165775,6816.84492
std,8.673133,0.795657,1.196956,20.830804,1.774526,4.135676,1617.915679
min,27.0,5.8,4.0,30.0,3.0,65.0,3000.0
25%,35.25,6.4,6.0,45.0,4.0,68.0,5600.0
50%,43.0,7.2,7.0,60.0,5.0,70.0,7000.0
75%,50.0,7.8,8.0,75.0,7.0,72.0,8000.0
max,59.0,8.5,9.0,90.0,8.0,86.0,10000.0


## Data Encoding

In [261]:
label_encoders = {}
for column in dataset.select_dtypes(include=['object']).columns:
    label_encoders[column] = LabelEncoder()
    dataset[column] = label_encoders[column].fit_transform(dataset[column])

In [None]:
dataset.describe()

## Split Data

In [182]:
x = dataset.drop(columns=['Quality of Sleep'])
y = dataset[['Quality of Sleep']]

In [None]:
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.3, random_state=42)

print("Training data size:", len(x_train))
print("Validation data size:", len(x_val))

print("\nTraining label size:", len(y_train))
print("Validation label size:", len(y_val))

In [184]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_val_scaled = scaler.transform(x_val)

## Machine Learning

In [216]:
class CustomCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        if logs['mae'] <= 0.1 and logs['val_mae'] <= 0.1 and epoch >= 500:
            print("Early stopping triggered at epoch ", epoch, " because validation MAE is less than 0.1 and training MAE is less than 0.1 and epoch is greater than 500")
            self.model.stop_training = True

In [217]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(x_train.shape[1],)),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(1, activation='linear')
])

In [None]:
model.summary()

In [219]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='mean_squared_error',
    metrics=['mae']
)

In [None]:
history = model.fit(x_train_scaled, y_train,
                    epochs=1000,
                    validation_data=(x_val_scaled, y_val),
                    batch_size=32,
                    callbacks=[CustomCallback()]
)

In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']
mae = history.history['mae']
val_mae = history.history['val_mae']

epochs = range(1, len(loss) + 1)

plt.figure(figsize=(14, 5))

# Loss plot
plt.subplot(1, 2, 1)
plt.plot(epochs, loss, label='Training Loss')
plt.plot(epochs, val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss (MSE)')
plt.legend()

# MAE plot
plt.subplot(1, 2, 2)
plt.plot(epochs, mae, label='Training MAE')
plt.plot(epochs, val_mae, label='Validation MAE')
plt.title('Training and Validation MAE')
plt.xlabel('Epochs')
plt.ylabel('Mean Absolute Error')
plt.legend()

plt.show()

In [None]:
print(x_val_scaled[0])

In [None]:
predictions = model.predict(x_train_scaled)

print("Beberapa prediksi:")
for i in range(10):
    for j in range(y_train.shape[1]):
        feature_name = y_train.columns[j]
        rounded_pred = round(predictions[i][j])
        if feature_name in label_encoders:
            pred_original = label_encoders[feature_name].inverse_transform([rounded_pred])[0]
            actual_original = label_encoders[feature_name].inverse_transform([y_train.iloc[i, j]])[0]
        else:
            pred_original = predictions[i][j]
            actual_original = y_train.iloc[i, j]
        try:
            pred_original = float(pred_original)
            actual_original = float(actual_original)
            print(f"Prediksi untuk target {feature_name}: {pred_original:.2f}, Nilai aktual: {actual_original:.2f}")
        except ValueError:
            print(f"Prediksi untuk target {feature_name}: {pred_original}, Nilai aktual: {actual_original}")
    print()

In [None]:
predictions = model.predict(x_val_scaled)

print("Beberapa prediksi pertama:")
for i in range(1):
    for j in range(y_train.shape[1]):
        print(f"Prediksi untuk target {y_val.columns[j]}: {predictions[i][j]:.2f}, Nilai aktual: {y_val.iloc[i, j]:.2f}")
    print()

In [277]:
dataset.iloc[56]

Gender                       Male
Age                            32
Sleep Duration                7.7
Quality of Sleep                7
Physical Activity Level        75
Stress Level                    6
BMI Category               Normal
Heart Rate                     70
Daily Steps                  8000
Sleep Disorder               None
Name: 56, dtype: object

In [None]:
# print hasil setelah di proses label encoder
print(dataset['BMI Category'][1])

## Save Model

In [None]:
model.save('model_1_fix.h5')

In [None]:
# Convert the model.
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the model.
with open('model_1_fix.tflite', 'wb') as f:
  f.write(tflite_model)

## Load Model

In [None]:
loaded_model = tf.keras.models.load_model('model_1_fix.h5')

In [None]:
loaded_model.summary()

In [None]:
predictions = loaded_model.predict(x_train_scaled)

print("Beberapa prediksi:")
for i in range(1):
    for j in range(y_train.shape[1]):
        feature_name = y_train.columns[j]
        rounded_pred = round(predictions[i][j])
        if feature_name in label_encoders:
            pred_original = label_encoders[feature_name].inverse_transform([rounded_pred])[0]
            actual_original = label_encoders[feature_name].inverse_transform([y_train.iloc[i, j]])[0]
        else:
            pred_original = predictions[i][j]
            actual_original = y_train.iloc[i, j]
        try:
            pred_original = float(pred_original)
            actual_original = float(actual_original)
            print(f"Prediksi untuk target {feature_name}: {pred_original:.2f}, Nilai aktual: {actual_original:.2f}")
        except ValueError:
            print(f"Prediksi untuk target {feature_name}: {pred_original}, Nilai aktual: {actual_original}")
    print()

In [None]:
predictions = loaded_model.predict(x_val_scaled)

print("Beberapa prediksi pertama:")
for i in range(1):
    for j in range(y_val.shape[1]):
        print(f"Prediksi untuk target {y_val.columns[j]}: {predictions[i][j]:.2f}, Nilai aktual: {y_val.iloc[i, j]:.2f}")
    print()