# Pandas - part 4 

In [1]:
import pandas as pd
import numpy as np

Part 4 will be learning Time Series, which is really important when working with any financial, trading, commodities, resource etc based data. It is also important to know how to work with datetime with datasets in general.

# **Time Series**

- **time stamps** = moment in time (July 1, 2020 at 7:20 am)
- **time intervals** = (periods) refer to length of time, start to finish
- **time deltas** = exact length of time

- https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior

In [2]:
from datetime import datetime

In [42]:
# create a datetime object

# make a date for your personal logs or data logs
d = datetime(year=2231, 
             month=7, 
             day=1, 
             hour=14, 
             minute=35, 
             second=33,
             microsecond= 78,
            )
d

datetime.datetime(2231, 7, 1, 14, 35, 33, 78)

## strftime() and strptime() Format Codes

In [53]:
# since we have a datetime object, we can get info
print("Format Codes:")

# the %A returns day of week of our date 
print("%A = {}".format(d.strftime('%A')))

# Weekday abbreviated name.
print("%a = {}".format(d.strftime('%a')))

# Weekday as a decimal number, 
# 0 = Sunday and 6 = Saturday.
print("%w = {}".format(d.strftime('%w')))

# Day of the month as a zero-padded decimal number.
print("%d = {}".format(d.strftime('%d')))

# Month full name.
print("%B = {}".format(d.strftime('%B')))

# Month abbreviated name.
print("%b = {}".format(d.strftime('%b')))

# Month as a zero-padded decimal number.
print("%m = {}".format(d.strftime('%m')))

# Year with century as a decimal number.
print("%Y = {}".format(d.strftime('%Y')))

# Year without century as a zero-padded decimal number.
print("%y = {}".format(d.strftime('%y')))

# Hour (24-hour clock) as a zero-padded decimal number.
print("%H = {}".format(d.strftime('%H')))

# Hour (12-hour clock) as a zero-padded decimal number.
print("%I = {}".format(d.strftime('%I')))

# Locale’s equivalent of either AM or PM.
print("%p = {}".format(d.strftime('%p')))

# Minute as a zero-padded decimal number.
print("%M (min)= {}".format(d.strftime('%M')))

# Second as a zero-padded decimal number.
print("%S (sec)= {}".format(d.strftime('%S')))

# Microsecond as a decimal number, zero-padded on the left.
print("%f (microsec)= {}".format(d.strftime('%f')))

# Time zone name
print("%Z (timezone)= {}".format(d.strftime('%Z')))

# Day of the year as a zero-padded decimal number.
# FIND OUT WHAT DAY OF THE YEAR
print("%j (day of yr)= {}".format(d.strftime('%j')))

# Week number of the year (Sunday as the first day of the week) 
# as a zero padded decimal number. All days in a new year preceding 
# the first Sunday are considered to be in week 0.
print("%U (wk # of yr)= {}".format(d.strftime('%U')))

# Week number of the year (Monday as the first day of the week) 
# as a decimal number. All days in a new year preceding the 
# first Monday are considered to be in week 0.
print("%W (wk # of yr)= {}".format(d.strftime('%W')))

# Locale’s appropriate date and time representation.
print("%c (date time)= {}".format(d.strftime('%c')))

# Locale’s appropriate date representation.
print("%x (date time)= {}".format(d.strftime('%x')))

print("%X (time)= {}".format(d.strftime('%X')))

Format Codes:
%A = Friday
%a = Fri
%w = 5
%d = 01
%B = July
%b = Jul
%m = 07
%Y = 2231
%y = 31
%H = 14
%I = 02
%p = PM
%M (min)= 35
%S (sec)= 33
%f (microsec)= 000078
%Z (timezone)= 
%j (day of yr)= 182
%U (wk # of yr)= 26
%W (wk # of yr)= 26
%c (date time)= Fri Jul  1 14:35:33 2231
%x (date time)= 07/01/31
%X (time)= 14:35:33


## Pandas **date & time**

In [55]:
# to_datetime()

# Pandas can figure out the datetime from a string! 
# make a date 
panda_d = pd.to_datetime("3rd of July, 2231, 16:37:12") 

panda_d

Timestamp('2231-07-03 16:37:12')

In [56]:
# what day of the week is July 3 of 2231 ?
panda_d.strftime('%A')

'Sunday'

In [66]:
# let's create 12 timestamps based on our original date 
# timedelta will handle the changes per 'D' day

panda_d + pd.to_timedelta(np.arange(12), 'D')
# returns an array of 12 dates (time unchanged) 

DatetimeIndex(['2231-07-03 16:37:12', '2231-07-04 16:37:12',
               '2231-07-05 16:37:12', '2231-07-06 16:37:12',
               '2231-07-07 16:37:12', '2231-07-08 16:37:12',
               '2231-07-09 16:37:12', '2231-07-10 16:37:12',
               '2231-07-11 16:37:12', '2231-07-12 16:37:12',
               '2231-07-13 16:37:12', '2231-07-14 16:37:12'],
              dtype='datetime64[ns]', freq=None)

In [67]:
# timedelta will handle the changes per 'H' hour
panda_d + pd.to_timedelta(np.arange(12), 'H') 
# the date changes slightly, but the hour moves by 12
# "12 hours into the future" 4:37 PM to 3:37 AM

DatetimeIndex(['2231-07-03 16:37:12', '2231-07-03 17:37:12',
               '2231-07-03 18:37:12', '2231-07-03 19:37:12',
               '2231-07-03 20:37:12', '2231-07-03 21:37:12',
               '2231-07-03 22:37:12', '2231-07-03 23:37:12',
               '2231-07-04 00:37:12', '2231-07-04 01:37:12',
               '2231-07-04 02:37:12', '2231-07-04 03:37:12'],
              dtype='datetime64[ns]', freq=None)

index by timestamps

In [70]:
datetime_index = pd.DatetimeIndex(['2032-04-01',
                                   '2032-05-01',
                                   '2057-04-01',
                                   '2057-05-01'])

index_data = pd.Series([1,2,3,4], index= datetime_index)
index_data

2032-04-01    1
2032-05-01    2
2057-04-01    3
2057-05-01    4
dtype: int64

In [71]:
index_data[2:4]

2057-04-01    3
2057-05-01    4
dtype: int64

### Pandas datetime data strcuture
- not all Federation data records have consistent dates for input, some species enter dates "wrong" according to Earth standards, but Pandas can handle a variety of inputs. 

In [88]:
# convert string dates to datetime objects

dt = pd.to_datetime([
    datetime(2072,3,25),
    '20th of April, 2055, 4:57:13',
    '2031-May-8-19:46:09',
    '08-08-2059-17:22:03',
    '20410421-13:13:11'
])

dt

DatetimeIndex(['2072-03-25 00:00:00', '2055-04-20 04:57:13',
               '2031-05-08 19:46:09', '2059-08-08 17:22:03',
               '2041-04-21 13:13:11'],
              dtype='datetime64[ns]', freq=None)

In [89]:
dt.to_period("D") # D for daily, excludes time

PeriodIndex(['2072-03-25', '2055-04-20', '2031-05-08', '2059-08-08',
             '2041-04-21'],
            dtype='period[D]', freq='D')

In [90]:
dt.to_period("H") # get the Hour

PeriodIndex(['2072-03-25 00:00', '2055-04-20 04:00', '2031-05-08 19:00',
             '2059-08-08 17:00', '2041-04-21 13:00'],
            dtype='period[H]', freq='H')

### TimeDelta

In [2]:
# time delta index is made when 1 date is subtracted from another
import pandas as pd
timedelta = pd.to_datetime([
    '11-16-2020-15:25', # today
    'Dec-25-2020',
    'Jan-20-2021-12:00',
    'April-21-2041',
    'Mar-14-2039',
    'May-7-2040',
    '06-15-2050',
    'Jan-01-2262',
])

timedelta

DatetimeIndex(['2020-11-16 15:25:00', '2020-12-25 00:00:00',
               '2021-01-20 12:00:00', '2041-04-21 00:00:00',
               '2039-03-14 00:00:00', '2040-05-07 00:00:00',
               '2050-06-15 00:00:00', '2262-01-01 00:00:00'],
              dtype='datetime64[ns]', freq=None)

In [5]:
# from today, how many days untul Jan 20th 2021 ?
# subtract from Jan to get positive value
# 2021-01-20  -  2020-11-13
print('Xmas is in: ',timedelta[1] - timedelta[0])
print('New US President: ',timedelta[2] - timedelta[0])

Xmas is in:  38 days 08:35:00
New US President:  64 days 20:35:00


### Pandas **date range**
- <code>pd.date_range(start, end)</code> 

| Code   | Description         | Code   | Description          |
|--------|---------------------|--------|----------------------|
| ``D``  | Calendar day        | ``B``  | Business day         |
| ``W``  | Weekly              |        |                      |
| ``M``  | Month end           | ``BM`` | Business month end   |
| ``Q``  | Quarter end         | ``BQ`` | Business quarter end |
| ``A``  | Year end            | ``BA`` | Business year end    |
| ``H``  | Hours               | ``BH`` | Business hours       |
| ``T``  | Minutes             |        |                      |
| ``S``  | Seconds             |        |                      |
| ``L``  | Milliseonds         |        |                      |
| ``U``  | Microseconds        |        |                      |
| ``N``  | nanoseconds         |        |                      |

In [121]:
# get the dates for Jan 1 2103 
pd.date_range('2103-01-01', '2103-01-11')

DatetimeIndex(['2103-01-01', '2103-01-02', '2103-01-03', '2103-01-04',
               '2103-01-05', '2103-01-06', '2103-01-07', '2103-01-08',
               '2103-01-09', '2103-01-10', '2103-01-11'],
              dtype='datetime64[ns]', freq='D')

In [120]:
# shortcut to get 8 dates from set date
pd.date_range('2103-01-03', periods=8)

DatetimeIndex(['2103-01-03', '2103-01-04', '2103-01-05', '2103-01-06',
               '2103-01-07', '2103-01-08', '2103-01-09', '2103-01-10'],
              dtype='datetime64[ns]', freq='D')

In [122]:
# get dates hourly 'H', 9 dates
# start at 1:30 to 9:30

pd.date_range('2103-01-03-13:30:02', periods=9, freq='H') 

DatetimeIndex(['2103-01-03 13:30:02', '2103-01-03 14:30:02',
               '2103-01-03 15:30:02', '2103-01-03 16:30:02',
               '2103-01-03 17:30:02', '2103-01-03 18:30:02',
               '2103-01-03 19:30:02', '2103-01-03 20:30:02',
               '2103-01-03 21:30:02'],
              dtype='datetime64[ns]', freq='H')

In [123]:
# weekly, 1x per week 'W'
# 6 weeks
pd.date_range('2103-01', periods=6, freq='W') 

DatetimeIndex(['2103-01-07', '2103-01-14', '2103-01-21', '2103-01-28',
               '2103-02-04', '2103-02-11'],
              dtype='datetime64[ns]', freq='W-SUN')