In [None]:
import pandas as pd
import numpy as np
import scipy.io as sio
import os
import re
import matplotlib.pyplot as plt
import source.transform_data as trs
from pathlib import Path

In [None]:
path_to_data = Path('Data')

In [None]:
data_wide = pd.read_csv(path_to_data/'data_table_wide.csv', index_col=None)

In [None]:
data_subjects = pd.read_csv(path_to_data/'data_table_subjects.csv', index_col=None)

In [None]:
data_days = pd.read_csv(path_to_data/'data_table_days.csv', index_col=None)

### Same number of subjects in T2 as in T3:

In [None]:
data_T2 = data_wide.loc[data_wide.exp_phase_id=='T2']
print(len(set(data_T2.subject)))
data_T3 = data_wide.loc[data_wide.exp_phase_id=='T3']
print(len(set(data_T3.subject)))

In [None]:
data_subjects.shape

In [None]:
data_subjects.isnull().values.any()

In [None]:
data_subjects.isna().values.any()

In [None]:
data_days.head()

In [None]:
data_subjects.head()

In [None]:
subject_means = data_subjects.mean(axis=0, numeric_only=True)
subject_means

In [None]:
type(subject_means)

In [None]:
subject_means = pd.DataFrame(subject_means)
subject_means = subject_means.reset_index()

In [None]:
subject_means.columns = ['column', 'mean_value']
subject_means

In [None]:
subject_means['subejct_means'] = ['subject_mean']*subject_means.shape[0]
subject_means = subject_means.pivot(index= 'subejct_means', columns = 'column', values='mean_value')
# Flatten the columns
subject_means.columns.name = None
subject_means.reset_index(inplace=True, drop=True)

In [None]:
subject_means

In [None]:
m_wbs_T2 = subject_means.number_of_morning_walking_bouts_T2
tot_wbs_T2 = subject_means.number_of_walking_bouts_T2
print(m_wbs_T2)
print(tot_wbs_T2)

In [None]:
m_wbs_T2/tot_wbs_T2

In [None]:
m_wbs_T3 = subject_means.number_of_morning_walking_bouts_T3
tot_wbs_T3 = subject_means.number_of_walking_bouts_T3
print(m_wbs_T3)
print(tot_wbs_T3)

In [None]:
m_wbs_T3/tot_wbs_T3

### Compare number of walking bouts in the morning and in the afternoon:

#### Afternoon is not limited whereas the morning is limited to 4 hours
#### The afternoon needs to be corrected to four hours for fair comparison:

In [None]:
data_wide.shape

In [None]:
data_wide[data_wide.time_stamps_hours<=8].shape

In [None]:
data_wide_8hours = data_wide[data_wide.time_stamps_hours<=8].copy()

In [None]:
walking_bouts_8hours = data_wide_8hours.groupby(['subject', 'exp_phase_id'])['WB_time'].count()
walking_bouts_8hours_frame = pd.DataFrame(walking_bouts_8hours)
walking_bouts_8hours_frame.reset_index(inplace=True)
walking_bouts_8hours_frame.head()

In [None]:
data_wide_8hours[data_wide_8hours.subject == 'Sub_01'].shape

In [None]:
walking_bouts_8hours_morning = data_wide_8hours.groupby(['subject', 'exp_phase_id', 'morning_afternoon'])['WB_time'].count()
walking_bouts_8hours_morning_frame = pd.DataFrame(walking_bouts_8hours_morning)
walking_bouts_8hours_morning_frame.reset_index(inplace=True)
walking_bouts_8hours_morning_frame.head()

In [None]:
walking_bouts_8hours_morning_frame = walking_bouts_8hours_morning_frame.rename(columns = {'WB_time': 'number_walking_bouts'})
walking_bouts_8hours_morning_frame.head()

In [None]:
data_wide_8hours.groupby(['subject', 'exp_phase_id', 'morning_afternoon'])['WB_time'].count()

In [None]:
walking_bouts_8hours = data_wide_8hours.groupby(['subject', 'exp_phase_id'])['WB_time'].count()

In [None]:
walking_bouts_8hours

In [None]:
subj_bools = data_wide.subject == 'Sub_52'
morning_bools = data_wide.morning_afternoon == 1
exp_phase_bools = data_wide.exp_phase_id == 'T3'
data_wide[subj_bools & morning_bools & exp_phase_bools].shape

### Rename data frame to make it shorter:

In [None]:
nb_wbs_8h = walking_bouts_8hours_morning_frame
nb_wbs_8h.head()

In [None]:
nb_wbs_8h['m_a_code'] = nb_wbs_8h.morning_afternoon.apply(str)

In [None]:
nb_wbs_8h['exp_phase_morning'] = nb_wbs_8h.exp_phase_id + '_' + nb_wbs_8h.m_a_code
nb_wbs_8h.head()

In [None]:
nb_wbs_8h = nb_wbs_8h[['subject', 'exp_phase_morning', 'number_walking_bouts']]

In [None]:
nb_wbs_8h_analysis =nb_wbs_8h.pivot(index=['subject'], columns='exp_phase_morning', 
                values='number_walking_bouts')
nb_wbs_8h_analysis.columns.name = None
nb_wbs_8h_analysis.reset_index(inplace=True)
nb_wbs_8h_analysis.head()

In [None]:
nb_wbs_8h_analysis.dtypes

In [None]:
nb_wbs_8h_analysis['morning_tot'] = nb_wbs_8h_analysis.T2_1 + nb_wbs_8h_analysis.T3_1
nb_wbs_8h_analysis['afternoon_tot'] = nb_wbs_8h_analysis.T2_0 + nb_wbs_8h_analysis.T3_0
nb_wbs_8h_analysis.head()

In [None]:
nb_wbs_8h_analysis.to_csv(path_to_data/'data_wb_analysis.csv', sep=',', index=False)

In [None]:
data_wide.head()

In [None]:
data_subjects.head()

In [None]:
data_subjects.boxplot(column=['number_of_days_T2', 'number_of_days_T3'])
plt.show()

In [None]:
data_subjects.boxplot(column=['number_of_walking_bouts_T2', 'number_of_walking_bouts_T3'])
plt.show()

In [None]:
nb_wbs_8h_analysis.boxplot(column=['T2_0', 'T2_1', 'T3_0', 'T3_1'])
plt.show()

In [None]:
nb_wbs_8h_analysis.boxplot(column=['morning_tot', 'afternoon_tot'])
plt.show()

In [None]:
data_wide.time_stamps_hours.hist()
plt.show()

In [None]:
data_wide_T2 = data_wide[data_wide.exp_phase_id=='T2']
data_wide_T2.time_stamps_hours.hist()
plt.show()

In [None]:
data_wide_T3 = data_wide[data_wide.exp_phase_id=='T3']
data_wide_T3.time_stamps_hours.hist()
plt.show()