In [None]:
# default_exp SALA

# SALA

> Core module of SALA, built to process actiwatch data for a single individual. Prepares actiwatch style data exported in a CSV from Philips Actiware watches and produces additional analyses.

In [None]:
#hide
from nbdev.showdoc import *

%run load_actiwatch_data.py
%run firsttime.py

import numpy as np
import pandas as pd

from joblib import *
from pandas.tseries.holiday import USFederalHolidayCalendar as calendar
from astral import LocationInfo, sun

In [None]:
#export
class SALA:
    """
    DataFrame-like storage for actiwatch data loaded either from a directory of csv files
    or an existing SALA or dataframe object. 
    
    
        Attributes
        ----------
        data: pd.DataFrame
            Dataframe of processed timing data

        timezone: str
            Single timezone specified for all data within the object. A list of 
            valid timezones can be obtained from pytz.all_timezones

        latitude: float
            Latitude position for sunrise/sunset calculations

        longitude: float
            Longitude position for sunrise/sunset calculations

        Methods
        -------
        init(data=None, directory=None, timezone=None, latitude=None, longitude=None)
            Initialization with a pre-parsed dataframe or raw data and other details

        get_raw_data(key, directory, grouping='Group')
            Loads raw actiwatch data

        export_timing_data(timing_data)
            Exports timing data to a parquet file

        process_timing_data(outfile, thresholds, directory, recalc_raw, recalc_timing, export_hook)
            Handle unprocessed data into two formats: a dataframe with all raw data, another with 
            only processed timing data

        set_sun_timings(timing_data)
            Calculates sunset and sunrise timing for a single dataframe

        process_sleep_data
            Processes sleep data for existing timing data
    """
    
    def __init__(self, data=None, directory=None, timezone=None, latitude=None, longitude=None):
        """
        Initializes a SALA object either from existing parsed timing data, or from a directory
        of csvs. Timezone information can be optionally included to allow for sunset, sunrise 
        data to be added.

        #### Parameters

            data: pd.DataFrame (optional)
                Pre-parsed dataframe.
            
            directory: dictionary (optional)
                Dictionary of valid folder names to load actiwatch data from.
                Folders should have .csv files in them.
            
            timezone: str (optional)
                A valid timezone (a list of timezones can be obtained from pytz.all_timezones).
            
            latitude: float (optional)
                Latitude position for sunrise/sunset calculations. Northern latitudes
                should be positive values.
                
            longitude: float (optional)
                Longitude position for sunrise/sunset calculations. Eastern longitudes
                should be positive values.
        """
        self._data = data
        self._directory = directory
        self._timezone = timezone
        self._latitude = latitude
        self._longitude = longitude
    
    @property
    def data(self):
        """Getter method for data."""
        return self._data
    
    @data.setter
    def data(self, value):
        """Setter method for data."""
        if type(value) != pd.DataFrame:
            raise TypeError("Error: Data must be of type pd.DataFrame")
        self._data = value
    
    @property
    def directory(self):
        """Getter method for directory."""
        return self._directory
    
    @directory.setter
    def directory(self, value):
        """Setter method for directory."""
        if type(value) != str:
            raise TypeError("Error: directory must be a valid string")
        self._directory = value
        
    @property
    def timezone(self):
        """Getter method for timezone."""
        return self._timezone
    
    @directory.setter
    def timezone(self, value):
        """Setter method for timezone."""
        if type(value) != str:
            raise TypeError("Error: timezone must be a valid string")
        self._timezone = value
        
    @property
    def latitude(self):
        """Getter method for latitude."""
        return self._latitude
    
    @directory.setter
    def latitude(self, value):
        """Setter method for latitude."""
        if not isinstance(value, (int, float, complex)):
            raise TypeError("Error: latitude must be a numeric")
        self._latitude = value
    
    @property
    def longitude(self):
        """Getter method for longitude."""
        return self._longitude
    
    @longitude.setter
    def longitude(self, value):
        """Setter method for longitude."""
        if not isinstance(value, (int, float, complex)):
            raise TypeError("Error: longitude must be a numeric")
        self._longitude = value
        
    def get_raw_data(self, key, directory = None, grouping = 'Group'):
        """Loads raw actiwatch data for a particular group based on a string key.

            #### Parameters

            key: str

                The key to load actiwatch data from (for example, "v1")
                
            directory: dict

                Dictionary of valid folders to load actiwatch data from.
                Folders should have .csv files in them. If no dictionary
                is provided, it uses the one initialized as part of the SALA
                object.
                
            grouping: str

                Name of the generated column for specifying groupings, where
                the values will be the name of the key given. Default = 'Group'.
                
            #### Returns

            All of the raw unprocessed data within the directory.

    """
        if directory is None and self._directory is None:
            raise ValueError("Error: a valid source of data must be provided.")
        if directory is not None:
            self._directory = directory
        raw_data = load_actiwatch_data(self.directory[key], uidprefix = key)[0]
        raw_data[grouping] = key
        return raw_data
    
    def export(self, timing_data, outfile):
        """
        Exports existing timing data to a parquet format.
        
        #### Parameters
            timing_data: pd.DataFrame

            Timing data

            outfile: str

                Directory to save to. (e.g. ../SALA/example_output/)
        """
        
        if self.data is None and timing_data is None:
            raise Exception("Error: no timing data available to export.")
        # putting date information in a parquet valid format
        timing_data["Date"] = timing_data["Date"].values.astype("datetime64[s]")
        timing_data.to_parquet(f"{outfile}timing.parquet", 
                               engine = "fastparquet", compression="gzip")
    
    
    def process_data(self,
                     outfile,
                     thresholds,
                     calc_raw = False,
                     calc_timing = False,
                     export_hook = None):
        """Process existing timing and raw data dataframes by loading them
        from disk or calculating their values if specified.

        #### Parameters

        outfile: str

            File for re-written data to be placed in, or for data to be loaded from.
            
        thresholds: list

            List of light thresholds for the watch data.
            
        key: str

            The key to load actiwatch data from.
            
        calc_raw: bool

            Forces calculation process if true, loads processed data from disk otherwise.
            Default value is 'False'
            
        calc_timing: bool

            Forces calculation of light timing data and exports the resulting data,
            loads it from disk otherwise.
            Default value is 'False'

        export_hook: function

            Placeholder for user to use their own function during data processing.
            This function should take in the timing data as a parameter. See
            documentation for example.
        #### Returns

            (as a tuple of pd.DataFrames) all the data, the processed timing data 
        """
        if calc_raw:
            print("Loading raw data from disk...")
            raw_results = (
            Parallel(n_jobs=len(self._directory))(delayed(self.get_raw_data)(key, self._directory) for key in self._directory.keys())
                   )
            all_data = pd.concat(raw_results)
            # save data to parquet file
            all_data.to_parquet(outfile + "raw.parquet", engine = 'fastparquet',
                               compression = "gzip")
        else:
            all_data = pd.read_parquet(outfile + "raw.parquet")
        
        if calc_timing:
            print("Processing light timing data...")
            timing_results = (Parallel(n_jobs=len(thresholds))
            (delayed(firstAndLastLight)(all_data, threshold) for threshold in thresholds)
                             )
            timing_data = pd.concat(timing_results)
            
            # loading federal holidays to classify dates as weekend/holiday
            cal = calendar()
            holidays = (
            cal.holidays(start = timing_data.Date.min(), end = timing_data.Date.max())
        )
            # retrieve day number (e.g. 0) from date index
            timing_data["DayofWeek"] = pd.DatetimeIndex(timing_data["Date"]).dayofweek
            days = ["Mon", "Tues", "Wed", "Thu", "Fri", "Sat", "Sun"]
            day_type = ["Weekday","Weekday","Weekday",
                    "Weekday","Weekday","Weekend/Holiday","Weekend/Holiday"]
            
            # result should be a combination of Group identifier and the day of the week (e.g. Mon)
            timing_data["GroupDayofWeek"] = (timing_data["Group"] + np.array(days)[timing_data["DayofWeek"]])
            
            is_holiday = pd.to_datetime(timing_data["Date"]).isin(holidays)
            weekends = (timing_data["Group"] + "Weekend/Holiday")
            
             # result should be a combination of Group identifier and day type (e.g. Weekday)
            day_types = (timing_data["Group"] + np.array(day_type)[timing_data["DayofWeek"]])                               
            
            timing_data["GroupDayType"] = day_types.where(~is_holiday).combine_first(weekends.where(is_holiday))
            timing_data["Weekend/Holiday"] = ((timing_data["DayofWeek"] > 4) | is_holiday)
            
            # function hook for extra processing before exporting to parquet
            if export_hook:
                timing_data = export_hook(timing_data)
            
            # setting and exporting timing data
            self._data = timing_data
            timing_data["Watch period"] = pd.to_timedelta(timing_data["Watch period"])
            self.export(timing_data, outfile)
        else:
            timing_data = pd.read_parquet(outfile + "timing.parquet", engine = "fastparquet")
            
        return all_data, timing_data
    
    def sun_timings(self, location, region):
        """Given a location (city) and region as additional markers,
        calculate sunset and sunrise timings.

        #### Parameters

        location: str (any string)

            Name of the location to lookup.
            
        region: str (any string)

            Region that the location is located in.

        #### Returns

            Modified timing data with sunrise and sunset calculations
        """
        
        if self._timezone is None or self._latitude is None or self._longitude is None:
            raise ValueError("Error: Missing timezone, latitude, or longitude info.")
        
        # add location info for calculating astral data
        city = LocationInfo(location, region, self._timezone, self._latitude, self._longitude)
        self._data["Sunrise"] = self._data["Date"].apply( lambda x: sun.sunrise(city.observer,
                                                                           x,
                                                                           tzinfo = city.tzinfo))
        self._data["Sunset"] = self._data["Date"].apply( lambda x: sun.sunset(city.observer,
                                                                         x,
                                                                         tzinfo = city.tzinfo))
        return self._data

    def process_sleep(self, sleep_split = "18:00", num_sleeps = 3):
        """Processes sleep data for existing timing data.

        #### Parameters

        timing_data: pd.DataFrame

            Timing data
            
        sleep_split: str

            Time to split the sleep day. Default is "18:00", which is 6:00PM.
            
        num_sleeps: int

            Cutoff for number of sleeps to display in first resulting frame.
            Default = 3, frame will store days with 3+ sleep instances

        #### Returns

            short_frame: pd.DataFrame

                Onset, offset, and duration for sleep periods on days with
                more than num_sleeps number of sleep periods
                
            timing_data: pd.DataFrame

                Modified timing data with included sleep information

        """
        sleepers = []
        sleep_onsets = []
        sleep_offsets = []
        sleep_durations = []
        sleep_onsetMSLMs = []
        sleep_offsetMSLMs = []
        
        timing_data = self._data
        for arow in timing_data.itertuples():
            UID = arow.UID
            DT = pd.to_datetime(arow.Date)
            TM = pd.to_datetime(DT + pd.Timedelta("1 day"))
            today = DT.strftime("%Y-%m-%d")

            nextday = TM.strftime("%Y-%m-%d")

            # taking raw timing data entry and splitting a "sleep day" at 6pm
            # under the assumption that people do not end their days that early
            day_split = all_data.query("UID == @UID").loc[today +" " + sleep_split:nextday + " 18:00"]

            # REST-S = watch thinks user is asleep
            asleep = day_split[ day_split["Interval Status"] == "REST-S"].copy()

            # there may be more than one sleep period in a given day's data
            # new sleep period = when there is more than 1 hour between successive REST-S entries
            sleep_periods = []
            per = 0
            count = 0

            try:
                lt = asleep.index[0]
                for time in asleep.index:
                    # allow up to 1 hour of being awake in the middle of the night
                    if (time - lt > pd.Timedelta("1 hour")):
                        per += 1
                    lt = time
                    sleep_periods.append(per)
                asleep["Sleep period"] = sleep_periods
            except IndexError:
                asleep["Sleep period"] = [pd.to_datetime(0)]

            try:
            # calc sleep onsets/offsets/duration for each period of sleep in a person-day of data
                sleeps = asleep.reset_index().groupby("Sleep period").apply( lambda x: pd.DataFrame({
                         "Sleep onset": [x.DateTime.min()],
                         "Sleep offset": [x.DateTime.max()],
                         "Sleep duration": [x.DateTime.max() - x.DateTime.min()]
                         }, index = x.DateTime.dt.normalize() ))
            # if the value is = 0 -> np.int64 (not a DateTime)
            except AttributeError:
                sleeps = asleep.reset_index().groupby("Sleep period").apply( lambda x: pd.DataFrame({
                 "Sleep onset": [pd.to_datetime(DT)],
                 "Sleep offset": [pd.to_datetime(DT)],
                 "Sleep duration": [pd.to_timedelta(x.DateTime.max() - x.DateTime.min())]
                 }))
            sleeps = sleeps.drop_duplicates().sort_values(by="Sleep duration", ascending = False)
            onset = sleeps.iloc[0]['Sleep onset']
            offset = sleeps.iloc[0]['Sleep offset']
            dur =  sleeps.iloc[0]['Sleep duration']

            # if onset is actually a datetime
            if not isinstance(onset, np.int64):
                onMSLM = (onset - DT).total_seconds() / 60.0

            # if offset is actually a datetime
            if not isinstance(offset, np.int64):
                offMSLM = np.maximum((offset - TM).total_seconds() / 60.0, 0.0)

            sleep_onsets.append(onset)
            sleep_offsets.append(offset)
            sleep_durations.append(dur)
            sleep_onsetMSLMs.append(onMSLM)
            sleep_offsetMSLMs.append(offMSLM)
            sleep_count = sleeps.shape[0]

            # adding to short_frame
            if sleep_count >= num_sleeps:
                sleeps['UID'] = UID
                sleeps['DT'] = DT
                sleeps.reset_index(drop = True).set_index(['UID','DT'])
                sleepers.append(sleeps)
        short_frame = (
                       pd.concat(sleepers).reset_index().drop('DateTime',axis=1)
                       .set_index(['UID','DT']).drop_duplicates()
                       )
        timing_data["Sleep onset"] = sleep_onsets
        timing_data["Sleep offset"] = sleep_offsets
        timing_data["Sleep duration"] = sleep_durations
        timing_data["Sleep onset MSLM"] = sleep_onsetMSLMs
        timing_data["Sleep offset MSLM"] = sleep_offsetMSLMs
        
        self._data = timing_data
        
        return short_frame, timing_data

## Creating SALA Objects

To access the full range of SALA based commands, a SALA object should be created. This can be done by initializing an instance of a SALA object using the initialization method provided. By default, a SALA object can be created without providing any of the necessary data for processing. Below are the intended uses for each of the parameters and how their presence controls SALA functionality. Access to (and manipulation of) these properities is available after initialization as well.

#### Data

This consists of stored processed timing data within SALA. It is meant to be initialized in cases where pre-parsed data with SALA-like manipulations already exist. Unprocessed data should instead be generated using functions within SALA.

#### Directory

If no processed data exists, a dictionary of strings denoting the locations of files to be processed must be provided in order for data to be processed.

#### Timezone, Latitude, Longitude

These location specific information pieces are necessary in generating accurate sunrise
and sunset timings.


In [None]:
show_doc(SALA.__init__, title_level = 3)

<h3 id="SALA.__init__" class="doc_header"><code>SALA.__init__</code><a href="__main__.py#L45" class="source_link" style="float:right">[source]</a></h3>

> <code>SALA.__init__</code>(**`data`**=*`None`*, **`directory`**=*`None`*, **`timezone`**=*`None`*, **`latitude`**=*`None`*, **`longitude`**=*`None`*)

Initializes a SALA object either from existing parsed timing data, or from a directory
of csvs. Timezone information can be optionally included to allow for sunset, sunrise 
data to be added.

#### Parameters

    data: pd.DataFrame (optional)
        Pre-parsed dataframe.
    
    directory: dictionary (optional)
        Dictionary of valid folder names to load actiwatch data from.
        Folders should have .csv files in them.
    
    timezone: str (optional)
        A valid timezone (a list of timezones can be obtained from pytz.all_timezones).
    
    latitude: float (optional)
        Latitude position for sunrise/sunset calculations. Northern latitudes
        should be positive values.
        
    longitude: float (optional)
        Longitude position for sunrise/sunset calculations. Eastern longitudes
        should be positive values.

A SALA object can be initialized as follows:

In [None]:
directory = {
    'base_': 'data/v1',
    'follow_up_': 'data/v3'
}
sala = SALA(directory = directory)

## Loading Actiwatch Data

Actiwatch data should be loaded in a directory style setup with key value pairings. This is intended to provide a generally flexible method for labeling groups within the data for easier by-group searching and analysis.

For example:

The above directory uses

- **key** = indicates the group name (for example, base_ or follow_up_), that is used to both name the grouping within the Group column, and as the front of the generated UID.
       
       
 - **value** = indicates that the csv files to be loaded are stored in a folder called "data/v1 or data/v3". The trailing part of the naming convention (after the / ) is appended to the UID. The remaining part of the UID is built using the file name within the subfolder.
        
    Following this structure an example file titled 'user1234' would net a UID of *base_v1\user1234* as
    its key is *base_*, its stored in *v1*, and the title of the csv is *user1234*.
    
    Also note that the name for their "group" will match the key value used for the directory.
    

Combined and loaded data can be retrieved using:

In [None]:
show_doc(SALA.get_raw_data, title_level = 3)

<h3 id="SALA.get_raw_data" class="doc_header"><code>SALA.get_raw_data</code><a href="__main__.py#L137" class="source_link" style="float:right">[source]</a></h3>

> <code>SALA.get_raw_data</code>(**`key`**, **`directory`**=*`None`*, **`grouping`**=*`'Group'`*)

Loads raw actiwatch data for a particular group based on a string key.

#### Parameters

key: str

    The key to load actiwatch data from (for example, "v1")
    
directory: dict

    Dictionary of valid folders to load actiwatch data from.
    Folders should have .csv files in them. If no dictionary
    is provided, it uses the one initialized as part of the SALA
    object.
    
grouping: str

    Name of the generated column for specifying groupings, where
    the values will be the name of the key given. Default = 'Group'.
    
#### Returns

All of the raw unprocessed data within the directory.

In [None]:
raw_data = sala.get_raw_data('base_', directory)
raw_data.dropna().head() 

Found 1 csv files in data/v1/. Pass #1, raw data
.
.
Pass #2, data summary
.
.EOF without retrieving summary data: data/v1\user1234_v1sample.csv


Unnamed: 0_level_0,Off-Wrist Status,Activity,Marker,White Light,Red Light,Green Light,Blue Light,Sleep/Wake,Interval Status,UID,Group
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2018-06-25 12:31:00,0,0.0,0.0,37.09,28.4,15.5,14.5,1.0,ACTIVE,base_v1\user1234,base_
2018-06-25 12:31:30,0,170.0,0.0,156.15,159.0,59.9,65.3,1.0,ACTIVE,base_v1\user1234,base_
2018-06-25 12:32:00,0,194.0,0.0,149.03,113.0,49.8,50.6,1.0,ACTIVE,base_v1\user1234,base_
2018-06-25 12:32:30,0,0.0,0.0,473.95,365.0,161.0,161.0,1.0,ACTIVE,base_v1\user1234,base_
2018-06-25 12:33:00,0,62.0,0.0,317.82,264.0,112.0,115.0,1.0,ACTIVE,base_v1\user1234,base_


## Exporting Data

SALA provides its own export function for taking existing SALA data and saving it to a parquet file. This is automatically done when running SALA's main processing command as well. An existing directory to save the file to should be provided by the user.

In [None]:
show_doc(SALA.export, title_level = 3)

<h3 id="SALA.export" class="doc_header"><code>SALA.export</code><a href="__main__.py#L171" class="source_link" style="float:right">[source]</a></h3>

> <code>SALA.export</code>(**`timing_data`**, **`outfile`**)

Exports existing timing data to a parquet format.

#### Parameters
    timing_data: pd.DataFrame

    Timing data

    outfile: str

        Directory to save to. (e.g. ../SALA/example_output/)

## Processing Data

The main functionality of SALA's core module is collecting and processing both raw and timing data. In most cases, SALA objects will be created without existing dataframes. This function serves to generate processed data with necessary features for plot generation in other SALA modules.

For more details, see the provided example below.

In [None]:
show_doc(SALA.process_data, title_level = 3)

<h3 id="SALA.process_data" class="doc_header"><code>SALA.process_data</code><a href="__main__.py#L193" class="source_link" style="float:right">[source]</a></h3>

> <code>SALA.process_data</code>(**`outfile`**, **`thresholds`**, **`calc_raw`**=*`False`*, **`calc_timing`**=*`False`*, **`export_hook`**=*`None`*)

Process existing timing and raw data dataframes by loading them
from disk or calculating their values if specified.

#### Parameters

outfile: str

    File for re-written data to be placed in, or for data to be loaded from.
    
thresholds: list

    List of light thresholds for the watch data.
    
key: str

    The key to load actiwatch data from.
    
calc_raw: bool

    Forces calculation process if true, loads processed data from disk otherwise.
    Default value is 'False'
    
calc_timing: bool

    Forces calculation of light timing data and exports the resulting data,
    loads it from disk otherwise.
    Default value is 'False'

export_hook: function

    Placeholder for user to use their own function during data processing.
    This function should take in the timing data as a parameter. See
    documentation for example.
#### Returns

    (as a tuple of pd.DataFrames) all the data, the processed timing data 

### Adding Extra Functions to Processing

The main processing function provides the ability for users to run their own custom function before exporting. A very minimal example of such a function is provided below.

In [None]:
#exports
def remove_first_day(timing_data):
    """Example function hook for removing data for the first day
    where its obvious that light data is non-existent (NaT)

     #### Parameters

    timing_data: pd.DataFrame

        Timing data
    """
    data = (
    timing_data[(timing_data["Last Light"].apply(np.isnat) == False)
               & (timing_data["Date"] != timing_data["Date"].min())]
            )
    return data

#### Example

An example of output for this function would be:

**Note**: The resulting data is split into three images for easier viewing

In [None]:
thresholds = [ [5], [10], [50], [100], [500], [1000] ] 
outfile = "../SALA/example_output/"

In [None]:
all_data, timing_data = sala.process_data(outfile, thresholds, True, True, remove_first_day)

Loading raw data from disk...
Processing light timing data...


In [None]:
all_data.dropna().head()

Unnamed: 0_level_0,Off-Wrist Status,Activity,Marker,White Light,Red Light,Green Light,Blue Light,Sleep/Wake,Interval Status,UID,Group
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2018-06-25 12:31:00,0,0.0,0.0,37.09,28.4,15.5,14.5,1.0,ACTIVE,base_v1\user1234,base_
2018-06-25 12:31:30,0,170.0,0.0,156.15,159.0,59.9,65.3,1.0,ACTIVE,base_v1\user1234,base_
2018-06-25 12:32:00,0,194.0,0.0,149.03,113.0,49.8,50.6,1.0,ACTIVE,base_v1\user1234,base_
2018-06-25 12:32:30,0,0.0,0.0,473.95,365.0,161.0,161.0,1.0,ACTIVE,base_v1\user1234,base_
2018-06-25 12:33:00,0,62.0,0.0,317.82,264.0,112.0,115.0,1.0,ACTIVE,base_v1\user1234,base_


In [None]:
timing_data.iloc[:,:7].head()

Unnamed: 0,UID,Date,Threshold,Last Light,Mins to LL from 4AM,First Light,Mins to FL from 4AM
0,base_v1\user1234,2018-07-09,5,2018-07-09 23:19:00,1159.0,2018-07-09 06:39:00,159.0
1,base_v1\user1234,2018-07-16,5,2018-07-16 16:24:30,744.0,2018-07-16 14:40:00,640.0
2,base_v1\user1234,2018-07-06,5,2018-07-06 23:24:00,1164.0,2018-07-06 06:44:30,164.0
3,base_v1\user1234,2018-07-03,5,2018-07-03 23:31:00,1171.0,2018-07-03 07:00:30,180.0
4,base_v1\user1234,2018-07-01,5,2018-07-01 23:11:00,1151.0,2018-07-01 08:36:00,276.0


In [None]:
timing_data.iloc[:,7:14].head()

Unnamed: 0,Time above threshold,Time above threshold AM,Minutes above threshold,Minutes above threshold AM,Lux minutes,Lux minutes AM,Group
0,0 days 13:56:00,0 days 04:35:30,836.0,275.5,1132776.31,678370.985,base_
1,0 days 00:02:30,0 days 00:00:00,2.5,0.0,214.5,0.0,base_
2,0 days 09:38:30,0 days 05:00:00,578.5,300.0,432815.22,298955.7,base_
3,0 days 12:47:30,0 days 03:37:30,767.5,217.5,377968.04,143815.5,base_
4,0 days 10:19:30,0 days 03:06:00,619.5,186.0,949194.78,165889.685,base_


In [None]:
timing_data.iloc[:,14:].head()

Unnamed: 0,Watch period,DayofWeek,GroupDayofWeek,GroupDayType,Weekend/Holiday
0,0 days 00:00:30,0,base_Mon,base_Weekday,False
1,0 days 00:00:30,0,base_Mon,base_Weekday,False
2,0 days 00:00:30,4,base_Fri,base_Weekday,False
3,0 days 00:00:30,1,base_Tues,base_Weekday,False
4,0 days 00:00:30,6,base_Sun,base_Weekend/Holiday,True


## Setting Sunset and Sunrise Timings

SALA provides the ability to add sunrise and sunset timings to the processed data. To do so, the specific locat ion (longitude and latitude) is required. Labels for location and region are also helpful for user clarity. 

In [None]:
show_doc(SALA.sun_timings, title_level = 3)

<h3 id="SALA.sun_timings" class="doc_header"><code>SALA.sun_timings</code><a href="__main__.py#L291" class="source_link" style="float:right">[source]</a></h3>

> <code>SALA.sun_timings</code>(**`location`**, **`region`**)

Given a location (city) and region as additional markers,
calculate sunset and sunrise timings.

#### Parameters

location: str (any string)

    Name of the location to lookup.
    
region: str (any string)

    Region that the location is located in.

#### Returns

    Modified timing data with sunrise and sunset calculations

In this example, we'll add sunrise and sunset calculations under the assumption that the data is from Seattle. The corresponding timezone for Seattle is "America/Los_Angeles". Since timezone, latitude, and longitude were not provided upon initialization, they must be separately provided before sun timings can be calculated.

In [None]:
sala.timezone = "America/Los_Angeles"
sala.latitude = 47.65
sala.longitude = -122.30
sala.data = sala.sun_timings("Seattle", "United States")
sala.data[["Sunrise", "Sunset"]].head()

Unnamed: 0,Sunrise,Sunset
0,2018-07-09 05:21:39.214048-07:00,2018-07-09 21:06:51.768689-07:00
1,2018-07-16 05:28:18.610888-07:00,2018-07-16 21:01:42.757393-07:00
2,2018-07-06 05:19:12.313737-07:00,2018-07-06 21:08:27.123116-07:00
3,2018-07-03 05:17:02.116743-07:00,2018-07-03 21:09:39.386344-07:00
4,2018-07-01 05:15:45.287540-07:00,2018-07-01 21:10:14.543651-07:00


## Adding Sleep Information

Adding sleep information to the processed data is also possible. The below function adds sleep data, allowing a "sleep day" to be split at a customizable time. The outputs of the function are
    
    1. short_frame: 
        a separate dataframe meant to be a quick way of visually subsetting and viewing bi/polyphasic instances. 
        This frame defaults to storing occurances of at least 3 sleep periods within a "sleep day", but can be modified.
        
    2. timing_data:
        modifies stored data to have sleep period information
       

In [None]:
show_doc(SALA.process_sleep, title_level = 3)

<h3 id="SALA.process_sleep" class="doc_header"><code>SALA.process_sleep</code><a href="__main__.py#L323" class="source_link" style="float:right">[source]</a></h3>

> <code>SALA.process_sleep</code>(**`sleep_split`**=*`'18:00'`*, **`num_sleeps`**=*`3`*)

Processes sleep data for existing timing data.

#### Parameters

timing_data: pd.DataFrame

    Timing data
    
sleep_split: str

    Time to split the sleep day. Default is "18:00", which is 6:00PM.
    
num_sleeps: int

    Cutoff for number of sleeps to display in first resulting frame.
    Default = 3, frame will store days with 3+ sleep instances

#### Returns

    short_frame: pd.DataFrame

        Onset, offset, and duration for sleep periods on days with
        more than num_sleeps number of sleep periods
        
    timing_data: pd.DataFrame

        Modified timing data with included sleep information

#### Example

Example outputs after processing sleep data include:

In [None]:
short_frame, timing_data = sala.process_sleep()

In [None]:
short_frame.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Sleep period,Sleep onset,Sleep offset,Sleep duration
UID,DT,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
base_v1\user1234,2018-06-28,0,2018-06-29 00:40:30,2018-06-29 06:41:00,0 days 06:00:30
base_v1\user1234,2018-06-28,2,2018-06-29 13:42:00,2018-06-29 15:23:30,0 days 01:41:30
base_v1\user1234,2018-06-28,1,2018-06-29 08:50:30,2018-06-29 09:04:00,0 days 00:13:30
follow_up_v3\user1234,2018-09-24,0,2018-09-24 21:31:00,2018-09-25 04:44:30,0 days 07:13:30
follow_up_v3\user1234,2018-09-24,1,2018-09-25 09:16:00,2018-09-25 10:30:00,0 days 01:14:00


In [None]:
timing_data[
    ["Sleep onset", "Sleep offset",
     "Sleep duration", "Sleep onset MSLM",
     "Sleep offset MSLM"]
    ].head()

Unnamed: 0,Sleep onset,Sleep offset,Sleep duration,Sleep onset MSLM,Sleep offset MSLM
0,2018-07-09 23:38:30,2018-07-10 06:57:00,0 days 07:18:30,1418.5,417.0
1,2018-07-16 00:00:00,2018-07-16 00:00:00,0 days 00:00:00,0.0,0.0
2,2018-07-07 00:08:00,2018-07-07 06:53:30,0 days 06:45:30,1448.0,413.5
3,2018-07-03 23:57:30,2018-07-04 06:14:00,0 days 06:16:30,1437.5,374.0
4,2018-07-01 20:56:30,2018-07-02 07:48:30,0 days 10:52:00,1256.5,468.5


In [None]:
sala.export(sala.data, outfile = "../SALA/example_output/")