# Average Daily Users

### Imports

In [1]:
from datetime import datetime
import os
import pandas as pd
import numpy as np

### Initializations

In [2]:
# Verify file path to trax data is correct
trafx_data_file_path = os.path.abspath(os.path.join(os.getcwd(), "..", "data", "raw", "TRAFx_raw.csv"))

### Load the trax data

In [3]:
trafx_df = pd.read_csv(trafx_data_file_path)
trafx_df.head(5)

Unnamed: 0,Day,BCC Bear Trap,BCC Butler,BCC Cardiff,BCC Days Fork,BCC Lake Blanche,BCC Mill D,BCC Mineral Fork,BCC Silver Fork,BCC Willow Heights,LCC Gate Buttress,LCC Our Lady,LCC Our Lady East,LCC Summer Road,LCC Summer Road 2,LCC White Pine,MCC Porter,MCC Road
0,2017-12-02,,,,,,,,,,,,,,,,,
1,2017-12-03,,48.0,103.0,,,7.0,,,,,,,,,,,
2,2017-12-04,,5.0,4.0,,,5.0,,,,,0.0,,,,,,
3,2017-12-05,,19.0,18.0,,,3.0,,,,,32.0,,,,,,
4,2017-12-06,,32.0,7.0,,,18.0,,,,,44.0,,,,,,


### Clean the data

In [4]:
# Replace any zeros with nan
trafx_df.replace(0, np.nan, inplace=True)

# Convert Day to datetime
trafx_df['Day'] = trafx_df['Day'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))

### Append information to main df
*_Note: season is only Dec 1 - Apri 30, all other data excluded_*

In [5]:
# Append season, month, and day of week on to main dataframe
trafx_df['month'] = trafx_df['Day'].apply(lambda x: x.strftime('%b'))
trafx_df['dayOfWeek'] = trafx_df['Day'].apply(lambda x: x.strftime('%A'))

def set_season(date_time):
    if date_time.month == 12:
        return "{}-{}".format(date_time.year, date_time.year+1)
    elif date_time.month <=4:
        return "{}-{}".format(date_time.year-1, date_time.year)
    else:
        return 'None'
                                                                                               
trafx_df['season'] = trafx_df['Day'].apply(lambda x: set_season(x))
trafx_df = trafx_df[trafx_df['season'] != 'None']
trafx_df.head(5)

Unnamed: 0,Day,BCC Bear Trap,BCC Butler,BCC Cardiff,BCC Days Fork,BCC Lake Blanche,BCC Mill D,BCC Mineral Fork,BCC Silver Fork,BCC Willow Heights,...,LCC Our Lady,LCC Our Lady East,LCC Summer Road,LCC Summer Road 2,LCC White Pine,MCC Porter,MCC Road,month,dayOfWeek,season
0,2017-12-02,,,,,,,,,,...,,,,,,,,Dec,Saturday,2017-2018
1,2017-12-03,,48.0,103.0,,,7.0,,,,...,,,,,,,,Dec,Sunday,2017-2018
2,2017-12-04,,5.0,4.0,,,5.0,,,,...,,,,,,,,Dec,Monday,2017-2018
3,2017-12-05,,19.0,18.0,,,3.0,,,,...,32.0,,,,,,,Dec,Tuesday,2017-2018
4,2017-12-06,,32.0,7.0,,,18.0,,,,...,44.0,,,,,,,Dec,Wednesday,2017-2018


### Group data by month and day and determine daily average users

In [6]:
adu_df = pd.DataFrame(columns=['site', 'season', 'month', 'dayOfWeek', 'n', 'adu'])

# Get monthly averages
ms_adu_val = trafx_df.groupby(['season', 'month']).mean()
ms_adu_n = trafx_df.groupby(['season', 'month']).count()
for season, month in ms_adu_val.index.to_list():
    for site in ms_adu_val.columns:
        if ms_adu_n.loc[season, month][site] > 0:
            adu_df.loc[len(adu_df)] = [site, season, month, 'all', ms_adu_n.loc[season, month][site], ms_adu_val.loc[season, month][site]]
adu_df.head(5)

Unnamed: 0,site,season,month,dayOfWeek,n,adu
0,BCC Butler,2017-2018,Apr,all,26,46.230769
1,BCC Cardiff,2017-2018,Apr,all,28,421.857143
2,BCC Mill D,2017-2018,Apr,all,24,17.541667
3,LCC Summer Road,2017-2018,Apr,all,30,123.866667
4,BCC Butler,2017-2018,Dec,all,14,38.428571


In [7]:
# Get daily averages
msd_adu_val = trafx_df.groupby(['season', 'month', 'dayOfWeek']).mean()
msd_adu_n = trafx_df.groupby(['season', 'month', 'dayOfWeek']).count()
for season, month, dow in msd_adu_val.index.to_list():
    for site in ms_adu_val.columns:
        if msd_adu_n.loc[season, month, dow][site] > 0:
            adu_df.loc[len(adu_df)] = [site, season, month, dow, msd_adu_n.loc[season, month, dow][site], msd_adu_val.loc[season, month, dow][site]]
        
adu_df.head(5)

Unnamed: 0,site,season,month,dayOfWeek,n,adu
0,BCC Butler,2017-2018,Apr,all,26,46.230769
1,BCC Cardiff,2017-2018,Apr,all,28,421.857143
2,BCC Mill D,2017-2018,Apr,all,24,17.541667
3,LCC Summer Road,2017-2018,Apr,all,30,123.866667
4,BCC Butler,2017-2018,Dec,all,14,38.428571


### Save data to file

In [8]:
# This assumes we're using the cookiecutter directory structure with notebooks at the same directory
# level as data with a raw as a subdirectory of data
file_path = os.path.abspath(os.path.join(os.getcwd(), "..", "data", "processed", "daily_user_averages.csv"))
adu_df.to_csv(file_path, index=False)