In [1]:
# Loading of all necessary imports
# Using Seaborn instead of matplotlib for most plots as it has more powerful plots out of the box
import pandas as pd
from os import listdir
from os.path import join
import matplotlib.pyplot as plt
from IPython.display import display, Markdown
import seaborn as sns
import numpy as np
from matplotlib import colors
import datetime

In [2]:
# imports from sci-kit learn
# Cluster models, pipeline methods and some model selection utilities
from sklearn.model_selection import train_test_split, GroupShuffleSplit, GridSearchCV
from sklearn.preprocessing import OneHotEncoder, normalize, StandardScaler
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.cluster import KMeans, AffinityPropagation, MeanShift, SpectralClustering, AgglomerativeClustering, DBSCAN
from yellowbrick.cluster import KElbowVisualizer
from sklearn.metrics import rand_score
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
#from sklearn.neural_network import MLPClassifier
from sklearn.compose import ColumnTransformer
from sklearn import metrics

In [3]:
sns.set_theme()

In [4]:
df = pd.read_csv('../tests.csv', delimiter=';')
df['Hypothesis'] = df.apply(lambda x: f'{x.X_text} has an effect on the {x.Y_text} of a participant.', axis=1)
df['Hypothesis'] = df['Hypothesis'].apply(lambda x: x.capitalize())

In [5]:
with pd.option_context("max_colwidth", 1000):
    with pd.option_context("styler.latex.multirow_align", "t"):
        df_tex = df.set_index(['Outcome', 'Predictor'])
        s = df_tex[["Hypothesis"]].style
        s = s.format_index(escape="latex", axis=0)
        s = s.format_index(escape="latex", axis=1)
        s = s.hide(names=True, level=0, axis=1)
        
        #df.to_latex(columns=["Hypothesis", 'Predictor', 'Outcome'], index=False, column_format='p{3.5cm}|p{5cm}|l', buf='test.tex')
        print(s.to_latex(column_format='l|l|p{6cm}', hrules=True, clines="skip-last;data"))

\begin{tabular}{l|l|p{6cm}}
\toprule
Outcome & Predictor &  \\
\midrule
\multirow[t]{7}{*}{RestingHeartRate} & NumberSteps & The number of daily steps has an effect on the resting heart rate of a participant. \\
 & Sleep1Efficiency & The quality of sleep has an effect on the resting heart rate of a participant. \\
 & Cardio\_minutes & Cardio exercise has an effect on the resting heart rate of a participant. \\
 & Peak\_minutes & High intensity exercise has an effect on the resting heart rate of a participant. \\
 & Fat Burn\_minutes & Fat burn exercise has an effect on the resting heart rate of a participant. \\
 & SleepMinutesAsleep & Sleep duration has an effect on the resting heart rate of a participant. \\
 & audit & Alcohol usage has an effect on the resting heart rate of a participant. \\
\cline{1-3}
\multirow[t]{4}{*}{psqi} & Cardio\_minutes & Cardio exercise has an effect on the sleep quality of a participant. \\
 & Peak\_minutes & High intensity exercise has an effect on the s

In [6]:
# Local source of data
data_path = "../data/"

daily_path = join(data_path, "processed", "daily_data.csv.gz")
dem_path = join(data_path, "processed", "participant_data.csv.gz")
df_participants = pd.read_csv(dem_path, compression='gzip')
df_daily = pd.read_csv(daily_path, compression='gzip')
df_participants.set_index("participant_id", inplace=True)
df_daily.set_index('participant_id', inplace=True)

# Joining all the different dataframes together using participant ID as the key 
df = df_daily.merge(df_participants, how="left", on='participant_id', suffixes=(None, '_dem'))

In [7]:
df.drop(columns=['Unnamed: 0'], inplace=True)
df['WearTime'] = (df['Cardio_minutes'] + df['Fat Burn_minutes'] + df['Peak_minutes'] + df['Out of Range_minutes'])
df = df[df['WearTime'] > 720]
df_g = df.groupby('participant_id').mean()
df_g['Count'] = df.groupby('participant_id')['NumberSteps'].count()
df = df_g
df = df[df['Count'] > 5]

  df_g = df.groupby('participant_id').mean()


In [39]:
long = df.describe().unstack().to_latex(longtable=True, buf='../descriptive_table.tex') 

  long = df.describe().unstack().to_latex(longtable=True, buf='../descriptive_table.tex')
