# Working With Dates in Pandas

In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

## Create your own date

In [None]:
#import datetime


In [None]:
#calculate now


In [None]:
#display datatype


In [None]:
#manually set date with (year, month, day)


In [None]:
#display datatype


## Transform to date format

### One date

In [None]:
date = 'Jan 1 1970'

In [None]:
#display datatype


In [None]:
#use pd.to_datetime() to convert


In [None]:
#display datatype


### One date, but confuse pandas

In [None]:
date = 'Jan:7:1970'

In [None]:
#display datatype


In [None]:
#use pd.to_datetime() to convert


We can fix this error using the `format` argument.

For info on formatting: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior

In [None]:
#use format arguement


### Now a whole columns of dates

In [None]:
url = "https://gist.githubusercontent.com/ryanorsinger/\
b309f8db19e0ca71b213d4877d835e77/raw/f5841017310e2f4ca070b313529ceec2375336ba/coffee_consumption.csv"
df = pd.read_csv(url)

In [None]:
#look at data


In [None]:
#look at datatypes


In [None]:
#use pd.to_datetime() to convert


In [None]:
#can also use .astype() to convert


In [None]:
#look at datatypes


## Now that they are in a date format, let's manipulate them

### use `.dt` to extract pieces of the date

the documentation: https://pandas.pydata.org/docs/reference/api/pandas.DatetimeIndex.html

### we can add them back to our initial dataframe

In [None]:
#add them all


### Reformat date using `dt.strftime()`

## Time to make it more complex!

In [None]:
sheet_url = 'https://docs.google.com/spreadsheets/d/1kTrAFSrr-xP3REs0Lly0TdV4ekrHahBXLg9r5qKxmV8/edit#gid=0'
csv_export_url = sheet_url.replace('/edit#gid=', '/export?format=csv&gid=')

df = pd.read_csv(csv_export_url)
df

In [None]:
#lowercase columns


#### let's plot the our close value

In [None]:
# plt.figure(figsize=(12,8))


# plt.title('the close values on a plot')
# plt.show()

> when we plot a single series using .plot(), the x-axis is the index value

## How do we make pandas time aware for time analysis?

1. Convert 'date/time' column to datetime object in Pandas (pd.to_datetime())
2. Set the datetime column as index
3. Sort the datetime index

### 1. Convert to datetime

> format argument allows us to tell pandas the makeup of our date, so it can be understood

### 2. Set the datetime column as Index

### 3. Sort the datetime index

#### Now let's plot it again!

In [None]:
# plt.figure(figsize=(12,8))



# plt.title('the close value over time')
# plt.show()

## Let's look at the the mean close value on each day of the week

In [None]:
#pull out weekday name & save


#### let's plot it!

In [None]:
# plt.figure(figsize=(10,6))


# plt.title('the mean close value each day of the week')
# plt.show()

### But Misty, I don't like that the days aren't in order

In [None]:
#use dayofweek attribute


#### let's plot it better this time!

In [None]:
# plt.figure(figsize=(10,6))

# plt.title('the mean close value each day of the week')
# plt.show()

## How do we get a subset of the dataframe?

In [None]:
#one yall already know 


In [None]:
#single .loc value


> `.loc` lets you send an index value and then it will give you the corresponding rows

In [None]:
#range of .loc values


In [None]:
#using .loc for one year


## What if we want a different period of data?

In [None]:
#drop extra columns


### Downsampling: reduce frequency

use `asfreq` to change the period

In [None]:
#set frequency to daily


In [None]:
#set frequency to monthly


#### let's plot it by month!

In [None]:
# plt.figure(figsize=(10,8))

# plt.title('close resampled by month')
# plt.show()

#### What if I want the first day of the month?

[Pandas asfreq Offset Aliases](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases)

[Pandas asfreq Anchored Offsets](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#anchored-offsets)

In [None]:
#set to first day of the month


#### let's plot it!

In [None]:
# plt.figure(figsize=(14,10))

# plt.title('close resampled by month')
# plt.legend()
# plt.show()

### Upsampling: Increase frequency

In [None]:
#set frequency to minutes


> generates null for values we don't have, but we can fill them!

In [None]:
#use ffill method


In [None]:
# use bfill method


### Resampling - Aggregating over time

In [None]:
#get the daily mean


#### let's plot it!

In [None]:
# plt.figure(figsize=(14,10))


# plt.title('plotting close over time with various resampling techniques')
# plt.legend()
# plt.show()

### Rolling averages

- Generally used to smooth out short-term fluctuations in time series data and highlight long-term trends
- use `rolling()` to calculate

In [None]:
#define df resampled daily with the mean


In [None]:
#use the rolling function


> the rolling average is the previous number of units averaged together  
> in this example, the rolling average on 2017-07-05 is the average of 2017-07-01 - 2017-07-05

#### let's plot it!

In [None]:
# plt.figure(figsize=(12,8))

#original granularity

#resample by week and look at 4 weeks 

#resample by week and look at 12 weeks 

# plt.legend()
# plt.title('original vs rolling averages')
# plt.show()

### How about Lagging or Leading the data?

* `.shift`: move the data backwards and forwards by a given amount
* `.diff`: find the difference with the previous observation (or a specified further back observation)

In [None]:
#shift by one


In [None]:
#shift by negative 1


In [None]:
#difference by one


In [None]:
#subtract shift by one from close


## Timezones

#### use `tz_localize("UTC")` to set timezone

#### use `tz_convert` to change timezones 

In [None]:
#convert to 'America/Chicago'


#### extract timezone

#### use .tz_localize(tz=None) to remove timezone 

## Timedelta

#### use timedelta to alter a date

In [None]:
#set todays date using datetime


In [None]:
#subtract using pd.Timedelta()


In [None]:
#add


#### subtract two dates