# Datetime

In [None]:
from datetime import datetime, timedelta

# Time this lesson plan was written
lesson_date = datetime(2016, 4, 5, 23, 31, 1, 844089)

Exercise 1: Write a function to print the day of the week the lesson plan was written.

In [None]:
def day_of_week(date):
    days_of_week = {0: 'monday', 1: 'tuesday', 2: 'wednesday', 3: 'thursday', 4: 'friday', 5: 'saturday', 6: 'sunday'}
    return days_of_week[date.weekday()]

day_of_week(lesson_date)

Exercise 2: How do I get an object with the current time?

In [None]:
datetime.now().strftime("%A")

In [None]:
now = datetime.now()
print(now.strftime("%A"))

Exercise 3: What is the Gregorian ordinal representation of the date? Using this value, how many days ago was this lesson plan written?

In [None]:
datetime.now()

In [None]:
datetime.now().toordinal() - lesson_date.toordinal()

Exercise 4: Try using `timedelta` to shift our lesson date object by the following intervals:
- 1 hour ahead
- 3 days ago
- 1 year, 3 days, 2 seconds ahead

In [None]:
lesson_date + timedelta(hours=1)

lesson_date - timedelta(days=3)
# OR
lesson_date + timedelta(days=-3)

lesson_date + timedelta(days=368, seconds=2)

# Timeseries Autocorrelation

In [None]:
# import packages and data
import pandas as pd, numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

data = pd.read_csv('https://s3.amazonaws.com/gamma-datasets/P2/rossmann.csv', skipinitialspace=True, low_memory=False)

In [None]:
# we are most interested in `Date` column that contains date of sales per store; convert to `DateTime` and set as index
# pull year and month as features
data['Date'] = pd.to_datetime(data['Date'])
data.set_index('Date', inplace=True)

data['Year'] = data.index.year
data['Month'] = data.index.month

# sort dates to ascending and view
data.sort_index(inplace=True)

# subset data to open dates
df = data[data.Open==1]

# df of store 1 open day sales
store1 = df[df.Store == 1]

In [None]:
# plot the distribution of sales by month and compare the effect of promotions
sns.catplot(
    col='Open',
    hue='Promo',
    x='Month',
    y='Sales',
    data=store1, 
    kind='box');

In [None]:
# Are sales more correlated with the prior day, day of week, last month, or last year?

# remake "daily_average_sales" but include Open data
average_daily_sales = df[['Sales', 'Open']].resample('D').mean()

print('Correlation with last day: {}'.format(average_daily_sales['Sales'].autocorr(lag=1)))
print('Correlation with last week: {}'.format(average_daily_sales['Sales'].autocorr(lag=7)))
print('Correlation with last month: {}'.format(average_daily_sales['Sales'].autocorr(lag=30)))
print('Correlation with last year: {}'.format(average_daily_sales['Sales'].autocorr(lag=365)))

In [None]:
# plot the 15 day rolling mean of customers in the stores
average_daily_sales.Sales.rolling(window=15).mean().plot(figsize=(18,6));

In [None]:
# identify the date with largest drop in average sales from previous cycles: daily, weekly, etc.
total_daily = df[['Sales', 'Open']].resample('D').sum()
total_daily['Diff'] = total_daily.Sales.diff(periods=1)

total_daily.sort_values(by='Diff').head()

In [None]:
# compute the total sales up until Dec. 2014
total_daily_sales = df.Sales.resample('D').sum()
total_daily_sales.expanding().sum()['2014-12'].head()

In [None]:
# When were the largest differences between 15-day moving/rolling averages?
x = total_daily_sales.rolling(window=15).mean().diff(1)

In [None]:
# sort values
x.sort_values(ascending=True).head(10)

# Unsurprisingly, they occur at the beginning of every year after the holiday season