In [495]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# CAS Data Science Projekt Sleep Analysis - Linear Regression

# Prepare data


In [496]:
data = pd.read_csv('prepared/fitbit_data_moon_crisp.csv', index_col=0)
data.dropna(inplace=True)
data.drop(columns=['date'], inplace=True)
data.drop(columns=['restlessness'], inplace=True)
data['scaled_temperature'] = data['temperature'].apply(lambda x: 80+x)
data.drop(columns=['temperature'], inplace=True)

In [497]:
data.head(5)

Unnamed: 0,overall_score,composition_score,revitalization_score,duration_score,deep_sleep_in_minutes,resting_heart_rate,bpm,Moon Phase,scaled_temperature
0,67,17,15,35,66,60,68.873969,New Moon,77.505155
2,88,21,22,45,126,60,67.206731,First Quarter,76.899359
3,56,15,15,26,23,56,67.187992,Third Quarter,76.917705
4,75,20,19,36,69,55,69.163313,First Quarter,77.016326
5,68,19,18,31,69,60,61.607115,New Moon,77.85856


In [498]:
# Split data into X and y
X_data = data.drop(columns='overall_score')
y_data = data['overall_score']

In [499]:
# set aside 20% of train and test data for evaluation
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, stratify=X_data['Moon Phase'], shuffle = True)

In [500]:
# same for the validation set
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state= 8)

In [501]:
print("X_train shape: {}".format(X_train.shape))
print("X_test shape: {}".format(X_test.shape))
print("X_val shape: {}".format(y_train.shape))

X_train shape: (65, 8)
X_test shape: (30, 8)
X_val shape: (65,)


In [502]:
print("y_train shape: {}".format(y_train.shape))
print("y_test shape: {}".format(y_test.shape))
print("y val shape: {}".format(y_test.shape))

y_train shape: (65,)
y_test shape: (30,)
y val shape: (30,)


# Define and train model

In [503]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer
from sklearn.pipeline import Pipeline

In [504]:
# Define model
pipe = Pipeline([
    ('ohe', make_column_transformer(
        (OneHotEncoder(handle_unknown='ignore'),
        ['Moon Phase']),
        remainder='passthrough')

    ),
    ('reg', LinearRegression())
])


In [505]:
from sklearn import set_config

set_config(display="diagram")
pipe

In [506]:
# Train (fit) the model with the train data.
_ = pipe.fit(X_train, y_train)

In [507]:
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [508]:
# Predict with the model the validation data.
y_val_pred = pipe.predict(X_val)

In [509]:
# How good are we on the validation data?
print(mean_absolute_percentage_error(y_val, y_val_pred))

2.1123195128064915e-14


# Predict Test Set

In [510]:
y_test_pred = pipe.predict(X_test)

In [511]:
# How good are we on the test data?
print(mean_absolute_percentage_error(y_test, y_test_pred))

2.073185254171128e-14
