In [50]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# CAS Data Science Projekt Sleep Analysis - Linear Regression

# Prepare data


In [51]:
data = pd.read_csv('prepared/fitbit_data_moon_crisp.csv', index_col=0)
data.dropna(inplace=True)
data.drop(columns=['date'], inplace=True)

In [52]:
data.head(3)

Unnamed: 0,temperature,overall_score,composition_score,revitalization_score,duration_score,deep_sleep_in_minutes,resting_heart_rate,restlessness,bpm,Moon Phase
0,-2.494845,67,17,15,35,66,60,0.073479,68.873969,New Moon
2,-3.100641,88,21,22,45,126,60,0.054695,67.206731,First Quarter
3,-3.082295,56,15,15,26,23,56,0.100186,67.187992,Third Quarter


In [53]:
# Split data into X and y
X_data = data.drop(columns='overall_score')
y_data = data['overall_score']

In [54]:
# set aside 20% of train and test data for evaluation
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, stratify=X_data['Moon Phase'], shuffle = True)

In [55]:
# same for the validation set
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state= 8)

In [63]:
print("X_train shape: {}".format(X_train.shape))
print("X_test shape: {}".format(X_test.shape))
print("X_val shape: {}".format(y_train.shape))

X_train shape: (65, 9)
X_test shape: (30, 9)
X_val shape: (65,)


In [64]:
print("y_train shape: {}".format(y_train.shape))
print("y_test shape: {}".format(y_test.shape))
print("y val shape: {}".format(y_test.shape))

y_train shape: (65,)
y_test shape: (30,)
y val shape: (30,)


# Define and train model

In [65]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer
from sklearn.pipeline import Pipeline

In [66]:
# Define model
model = Pipeline([
    ('ohe', make_column_transformer(
        (OneHotEncoder(handle_unknown='ignore'),
        ['Moon Phase']),
        remainder='passthrough')
    ),
    ('reg', LinearRegression())
])

In [67]:
# Train (fit) the model with the train data.
_ = model.fit(X_train, y_train)

In [68]:
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [69]:
# Predict with the model the validation data.
y_val_pred = model.predict(X_val)

In [70]:
# How good are we on the validation data?
print(mean_absolute_percentage_error(y_val, y_val_pred))

1.2753007852500084e-14


# Predict Test Set

In [71]:
y_test_pred = model.predict(X_test)

In [72]:
# How good are we on the test data?
print(mean_absolute_percentage_error(y_test, y_test_pred))

1.2721341076424652e-14
