# Time series - Pandas 

## In this tutorial, you will learn:
### 1. Pandas Date-time series class
### 2. Date-range
### 3. Period
### 4. Timestamp

In [2]:
import pandas as pd
import numpy as np

# ignore warnings from pandas
import warnings
warnings.filterwarnings('ignore')

In [4]:
# Date- Time
print(pd.to_datetime('2018-01-15 3:45pm'))
print(pd.to_datetime('2018-01-15 3:45am'))

2018-01-15 15:45:00
2018-01-15 03:45:00


In [9]:
print(pd.to_datetime('22/2/2020 2:42:56.444333'))  #

2020-02-22 02:42:56.444333


In [8]:
# European style
pd.to_datetime(['04-22-2020', '13-04-2020'], dayfirst=True)

DatetimeIndex(['2020-04-22', '2020-04-13'], dtype='datetime64[ns]', freq=None)

In [19]:
# What's wrong?
print(pd.to_datetime(['1/15/2020'], format='%m/%d/%Y'))

DatetimeIndex(['2020-01-15'], dtype='datetime64[ns]', freq=None)


In [0]:
# Did you notice y is small? why?
pd.to_datetime(['2/25/10', '8/6/19', '12/30/20'], format='%m/%d/%y')

In [0]:
# What's wrong?
pd.to_datetime(['12/32/20'], format='%m/%d/%y')

In [20]:
# What's wrong? #b
# Nice website about formatting: https://devhints.io/datetime 
ts =  pd.to_datetime('16-NOV-10 12.00.00.000000000 AM', format='%d-%B-%y %H.%M.%S.%f %p')
ts

Timestamp('2010-11-16 12:00:00')

In [22]:
# Time stamps
# Timestamp is the pandas equivalent of python’s Datetime 
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timestamp.html
pd.Timestamp('2020-04-22 11:22:25')

Timestamp('2020-04-22 11:22:25')

In [31]:
#  Convert Float representing a Unix epoch in units of seconds
# https://www.epochconverter.com/clock
# Unix epoch:  It is the number of seconds that have went by since the Unix epoch,
#... that is the time 00:00:00 UTC on 1 January 1970.
pd.Timestamp(1585790501, unit='s', tz='US/Eastern')

Timestamp('2020-04-01 21:21:41-0400', tz='US/Eastern')

In [31]:
pd.to_datetime([1349720105100, 1349720105200, 1349720105300], unit='ms')

DatetimeIndex(['2012-10-08 18:15:05.100000', '2012-10-08 18:15:05.200000',
               '2012-10-08 18:15:05.300000'],
              dtype='datetime64[ns]', freq=None)

In [33]:
dt = pd.Timestamp(year=2020, month=4, day=2, hour=6, )
dt

Timestamp('2020-04-02 06:00:00')

In [35]:
dt.day_name() , dt.month_name(), dt.is_leap_year

('Thursday', 'April', True)

In [36]:
# Work with time zones
dt = pd.Timestamp(1585601994, unit='s', tz='US/Eastern')
dt

Timestamp('2020-03-30 16:59:54-0400', tz='US/Eastern')

In [37]:
#To convert a time zone aware pandas object to another time zone, you can use the tz_convert method.
dt.tz_convert('Europe/London')

Timestamp('2020-03-30 21:59:54+0100', tz='Europe/London')

In [38]:
from pytz import common_timezones, all_timezones
print(len(common_timezones))
print(len(all_timezones))

440
592


In [39]:
print(common_timezones[:5])

['Africa/Abidjan', 'Africa/Accra', 'Africa/Addis_Ababa', 'Africa/Algiers', 'Africa/Asmara']


In [40]:
print(all_timezones[-5:])

['UTC', 'Universal', 'W-SU', 'WET', 'Zulu']


In [47]:
# localize a = timestamp
ts = pd.Timestamp('2020-04-02 10')
ts

Timestamp('2020-04-02 10:00:00')

In [48]:
#ts = ts.tz_localize(tz = 'US/Eastern')
#ts.tz 

<DstTzInfo 'US/Eastern' EDT-1 day, 20:00:00 DST>

In [49]:
# What's wrong?
ts.tz_convert(tz = 'Asia/Kolkata') 

Timestamp('2020-04-02 19:30:00+0530', tz='Asia/Kolkata')

In [54]:
# Time Spans
# A Period represents a span of time (e.g., a day, a month, a quarter, etc). 
dti = pd.Period('2020-04-22','M') 
dti

Period('2020-04', 'M')

In [55]:
dti.day, dti.month, dti.year

(30, 4, 2020)

In [56]:
dti += 1
dti

Period('2020-05', 'M')

In [57]:
dti = pd.Period('2020-04-22 10:45')
dti

Period('2020-04-22 10:45', 'T')

In [58]:
dti += 1
dti 

Period('2020-04-22 10:46', 'T')

In [59]:
# Work with datetime
import datetime

In [62]:
# If our readings has an offset, we can fix it using pandas
p_ts = pd.Period('2020-04-02 06:00', freq='H')
p_ts + pd.offsets.Hour(4) 

Period('2020-04-02 10:00', 'H')

In [63]:
p_ts + datetime.timedelta(hours = 4 ) #minutes

Period('2020-04-02 10:00', 'H')

In [64]:
p_ts + np.timedelta64(60 * 60 * 4, 's' )

Period('2020-04-02 10:00', 'H')

In [52]:
# date range and periods
dti = pd.date_range(end='04-01-2020', periods=10, freq='H')
dti

DatetimeIndex(['2020-03-31 15:00:00', '2020-03-31 16:00:00',
               '2020-03-31 17:00:00', '2020-03-31 18:00:00',
               '2020-03-31 19:00:00', '2020-03-31 20:00:00',
               '2020-03-31 21:00:00', '2020-03-31 22:00:00',
               '2020-03-31 23:00:00', '2020-04-01 00:00:00'],
              dtype='datetime64[ns]', freq='H')

In [53]:
# create features
dti.hour

Int64Index([15, 16, 17, 18, 19, 20, 21, 22, 23, 0], dtype='int64')

In [66]:
#Daterange
print(pd.date_range(start='04-01-2020', end='08-10-2020', freq='10D'))

DatetimeIndex(['2020-04-01', '2020-04-11', '2020-04-21', '2020-05-01',
               '2020-05-11', '2020-05-21', '2020-05-31', '2020-06-10',
               '2020-06-20', '2020-06-30', '2020-07-10', '2020-07-20',
               '2020-07-30', '2020-08-09'],
              dtype='datetime64[ns]', freq='10D')


In [68]:
# Indexing with date range
rng = pd.date_range('2020 April 2', periods = 5, freq = 'M')
ts = pd.Series(range(len(rng)), index = rng)
ts

2020-04-30    0
2020-05-31    1
2020-06-30    2
2020-07-31    3
2020-08-31    4
Freq: M, dtype: int64

In [69]:
type(ts.index)

pandas.core.indexes.datetimes.DatetimeIndex

In [74]:
ts_period = ts.to_period() #freq='d'
ts_period

2020-04    0
2020-05    1
2020-06    2
2020-07    3
2020-08    4
Freq: M, dtype: int64

In [58]:
type(ts_period.index)

pandas.core.indexes.period.PeriodIndex

# Practicle Example 


In [76]:
# To show the power of timedate with Pandas
df = pd.read_csv('time.csv', header=None)
df.columns = ['Time']
df.head(5)

Unnamed: 0,Time
0,20-SEP-14 12.00.00.000000000 AM
1,
2,
3,13-NOV-18 12.00.00.000000000 AM
4,19-AUG-18 12.00.00.000000000 AM


In [77]:
df.shape

(2646210, 1)

In [80]:
df.isnull().sum()

Time    118252
dtype: int64

In [81]:
df['Time'] =  pd.to_datetime(df['Time'], format='%d-%b-%y %H.%M.%S.%f %p', errors='coerce')
# to understand the errors, please refere to the link below
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.to_datetime.html

In [84]:
df.sample(5)

Unnamed: 0,Time
2365140,1999-07-01 12:00:00
445369,2015-05-15 12:00:00
2502025,2016-10-19 12:00:00
1891354,2003-09-05 12:00:00
2131705,2019-06-26 12:00:00


In [85]:
df.isnull().sum()

Time    118252
dtype: int64

# End