In [1]:
import pandas as pd

In [2]:
import datetime as dt

## Review of python's datetime module

In [3]:
dt.date(2016,4,12)

datetime.date(2016, 4, 12)

In [4]:
someday = dt.date(2016,4,12)

In [5]:
someday.year

2016

In [6]:
someday.month

4

In [7]:
someday.day

12

In [8]:
dt.datetime(2018,12,3)

datetime.datetime(2018, 12, 3, 0, 0)

In [9]:
dt.datetime(2018,12,3,8,13,45) # args is year,month,day,hour,min,second

datetime.datetime(2018, 12, 3, 8, 13, 45)

In [10]:
#To print in a good visual method
str(dt.datetime(2018,12,3,8,13,45))

'2018-12-03 08:13:45'

In [11]:
sometime = dt.datetime(2018,12,3,8,13,45)

In [12]:
sometime.year

2018

In [13]:
sometime.month

12

In [14]:
sometime.day

3

In [15]:
sometime.hour

8

In [16]:
sometime.minute

13

In [17]:
sometime.second

45

## Pandas timestamp object

In [18]:
pd.Timestamp('2015-03-31')

Timestamp('2015-03-31 00:00:00')

In [19]:
pd.Timestamp('2015/03/31')

Timestamp('2015-03-31 00:00:00')

In [20]:
pd.Timestamp('1/1/2015')

Timestamp('2015-01-01 00:00:00')

In [21]:
pd.Timestamp('4/3/2010')
#In normal we think 4 as date but pandas take it as month . Be sure when working with custom data sets

Timestamp('2010-04-03 00:00:00')

In [22]:
pd.Timestamp(dt.date(2016,4,12))

Timestamp('2016-04-12 00:00:00')

In [23]:
pd.Timestamp(dt.datetime(2018,12,3,8,13,45))

Timestamp('2018-12-03 08:13:45')

## The Pandas DateTimeIndex object

In [24]:
dates = ["2016-12-3","2017-04-03","2018-05-16"]

In [25]:
pd.DatetimeIndex(dates)

DatetimeIndex(['2016-12-03', '2017-04-03', '2018-05-16'], dtype='datetime64[ns]', freq=None)

In [26]:
dates = [dt.date(2016,4,12),dt.date(2016,4,2),dt.date(2016,4,1)]

In [27]:
dtIndex = pd.DatetimeIndex(dates)

In [28]:
values = [121,2434,65]

In [29]:
pd.Series(data=values,index=dtIndex)

2016-04-12     121
2016-04-02    2434
2016-04-01      65
dtype: int64

## The pd.to_datetime() Method

In [30]:
pd.to_datetime('2019-02-13')

Timestamp('2019-02-13 00:00:00')

In [31]:
pd.to_datetime('May 16th, 1997')

Timestamp('1997-05-16 00:00:00')

In [32]:
pd.to_datetime('May 16, 1997')

Timestamp('1997-05-16 00:00:00')

In [33]:
pd.to_datetime('May ,16, 1997')

Timestamp('1997-05-16 00:00:00')

In [34]:
pd.to_datetime('1997') # Month and day are set to default (January 1st)

Timestamp('1997-01-01 00:00:00')

In [35]:
pd.to_datetime(['2015-1-1','2014/2/8','2016','apr 3, 2017'])

DatetimeIndex(['2015-01-01', '2014-02-08', '2016-01-01', '2017-04-03'], dtype='datetime64[ns]', freq=None)

In [36]:
times = pd.Series(['2015-1-1','2014/2/8','2016','apr 3, 2017'])
times

0       2015-1-1
1       2014/2/8
2           2016
3    apr 3, 2017
dtype: object

In [37]:
pd.to_datetime(times)

0   2015-01-01
1   2014-02-08
2   2016-01-01
3   2017-04-03
dtype: datetime64[ns]

In [38]:
dates = pd.Series(["July 4th 1995",'10/04/1991','Hello','2015-02-31'])

In [39]:
pd.to_datetime(dates,errors='coerce')

0   1995-07-04
1   1991-10-04
2          NaT
3          NaT
dtype: datetime64[ns]

In [40]:
pd.to_datetime([1209391345,2145642456,1234578532,1246788654,1498765432],unit ='s')

DatetimeIndex(['2008-04-28 14:02:25', '2037-12-28 19:47:36',
               '2009-02-14 02:28:52', '2009-07-05 10:10:54',
               '2017-06-29 19:43:52'],
              dtype='datetime64[ns]', freq=None)

## Create Range of dates with the pd.date_range() method

In [41]:
times = pd.date_range(start='2016-1-1',end='2016-1-10',freq = 'D')
# Freq means frequency of dates it travels by

In [42]:
type(times)

pandas.core.indexes.datetimes.DatetimeIndex

In [43]:
times[0]

Timestamp('2016-01-01 00:00:00', freq='D')

In [44]:
type(times[0])

pandas._libs.tslibs.timestamps.Timestamp

In [45]:
pd.date_range(start='2016-1-1',end='2016-1-10',freq = '2D') #Incrementing by 2 days

DatetimeIndex(['2016-01-01', '2016-01-03', '2016-01-05', '2016-01-07',
               '2016-01-09'],
              dtype='datetime64[ns]', freq='2D')

In [46]:
pd.date_range(start='2016-1-1',end='2016-1-10',freq = 'B') #Gives business days

DatetimeIndex(['2016-01-01', '2016-01-04', '2016-01-05', '2016-01-06',
               '2016-01-07', '2016-01-08'],
              dtype='datetime64[ns]', freq='B')

In [47]:
pd.date_range(start='2016-1-1',end='2016-1-10',freq = 'W') #One day per week ( Default is sunday)

DatetimeIndex(['2016-01-03', '2016-01-10'], dtype='datetime64[ns]', freq='W-SUN')

In [48]:
pd.date_range(start='2016-1-1',end='2016-1-10',freq = 'W-FRI') # sTARTS ON  friday and gives 1 day per week... IE. gives all friday in between

DatetimeIndex(['2016-01-01', '2016-01-08'], dtype='datetime64[ns]', freq='W-FRI')

In [49]:
pd.date_range(start='2016-1-1',end='2016-1-10',freq = 'H') # Gives every hour

DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 01:00:00',
               '2016-01-01 02:00:00', '2016-01-01 03:00:00',
               '2016-01-01 04:00:00', '2016-01-01 05:00:00',
               '2016-01-01 06:00:00', '2016-01-01 07:00:00',
               '2016-01-01 08:00:00', '2016-01-01 09:00:00',
               ...
               '2016-01-09 15:00:00', '2016-01-09 16:00:00',
               '2016-01-09 17:00:00', '2016-01-09 18:00:00',
               '2016-01-09 19:00:00', '2016-01-09 20:00:00',
               '2016-01-09 21:00:00', '2016-01-09 22:00:00',
               '2016-01-09 23:00:00', '2016-01-10 00:00:00'],
              dtype='datetime64[ns]', length=217, freq='H')

In [50]:
pd.date_range(start='2016-1-1',end='2016-12-31',freq = 'M') # Gives last day of each month

DatetimeIndex(['2016-01-31', '2016-02-29', '2016-03-31', '2016-04-30',
               '2016-05-31', '2016-06-30', '2016-07-31', '2016-08-31',
               '2016-09-30', '2016-10-31', '2016-11-30', '2016-12-31'],
              dtype='datetime64[ns]', freq='M')

In [51]:
pd.date_range(start='2016-1-1',end='2016-12-31',freq = 'MS') # gives month start

DatetimeIndex(['2016-01-01', '2016-02-01', '2016-03-01', '2016-04-01',
               '2016-05-01', '2016-06-01', '2016-07-01', '2016-08-01',
               '2016-09-01', '2016-10-01', '2016-11-01', '2016-12-01'],
              dtype='datetime64[ns]', freq='MS')

In [52]:
pd.date_range(start='2016-1-1',end='2033-12-31',freq = 'A') # Gives last day of each year

DatetimeIndex(['2016-12-31', '2017-12-31', '2018-12-31', '2019-12-31',
               '2020-12-31', '2021-12-31', '2022-12-31', '2023-12-31',
               '2024-12-31', '2025-12-31', '2026-12-31', '2027-12-31',
               '2028-12-31', '2029-12-31', '2030-12-31', '2031-12-31',
               '2032-12-31', '2033-12-31'],
              dtype='datetime64[ns]', freq='A-DEC')

In [53]:
pd.date_range(start="2012-09-09",periods=25, freq = "D") # Gives 25 values coz we set periods =25

DatetimeIndex(['2012-09-09', '2012-09-10', '2012-09-11', '2012-09-12',
               '2012-09-13', '2012-09-14', '2012-09-15', '2012-09-16',
               '2012-09-17', '2012-09-18', '2012-09-19', '2012-09-20',
               '2012-09-21', '2012-09-22', '2012-09-23', '2012-09-24',
               '2012-09-25', '2012-09-26', '2012-09-27', '2012-09-28',
               '2012-09-29', '2012-09-30', '2012-10-01', '2012-10-02',
               '2012-10-03'],
              dtype='datetime64[ns]', freq='D')

In [54]:
pd.date_range(start="2012-09-09",periods=25, freq = "B") # Gives 25 business dat values coz we set periods =25

DatetimeIndex(['2012-09-10', '2012-09-11', '2012-09-12', '2012-09-13',
               '2012-09-14', '2012-09-17', '2012-09-18', '2012-09-19',
               '2012-09-20', '2012-09-21', '2012-09-24', '2012-09-25',
               '2012-09-26', '2012-09-27', '2012-09-28', '2012-10-01',
               '2012-10-02', '2012-10-03', '2012-10-04', '2012-10-05',
               '2012-10-08', '2012-10-09', '2012-10-10', '2012-10-11',
               '2012-10-12'],
              dtype='datetime64[ns]', freq='B')

In [55]:
pd.date_range(start="2012-09-09",periods=25, freq = "6H") #values aftr every 6 hours will be printed

DatetimeIndex(['2012-09-09 00:00:00', '2012-09-09 06:00:00',
               '2012-09-09 12:00:00', '2012-09-09 18:00:00',
               '2012-09-10 00:00:00', '2012-09-10 06:00:00',
               '2012-09-10 12:00:00', '2012-09-10 18:00:00',
               '2012-09-11 00:00:00', '2012-09-11 06:00:00',
               '2012-09-11 12:00:00', '2012-09-11 18:00:00',
               '2012-09-12 00:00:00', '2012-09-12 06:00:00',
               '2012-09-12 12:00:00', '2012-09-12 18:00:00',
               '2012-09-13 00:00:00', '2012-09-13 06:00:00',
               '2012-09-13 12:00:00', '2012-09-13 18:00:00',
               '2012-09-14 00:00:00', '2012-09-14 06:00:00',
               '2012-09-14 12:00:00', '2012-09-14 18:00:00',
               '2012-09-15 00:00:00'],
              dtype='datetime64[ns]', freq='6H')

In [56]:
pd.date_range(end='1999-12-31',periods=20,freq = 'D')

DatetimeIndex(['1999-12-12', '1999-12-13', '1999-12-14', '1999-12-15',
               '1999-12-16', '1999-12-17', '1999-12-18', '1999-12-19',
               '1999-12-20', '1999-12-21', '1999-12-22', '1999-12-23',
               '1999-12-24', '1999-12-25', '1999-12-26', '1999-12-27',
               '1999-12-28', '1999-12-29', '1999-12-30', '1999-12-31'],
              dtype='datetime64[ns]', freq='D')

In [57]:
pd.date_range(end='1999-12-31',periods=20,freq = 'B')

DatetimeIndex(['1999-12-06', '1999-12-07', '1999-12-08', '1999-12-09',
               '1999-12-10', '1999-12-13', '1999-12-14', '1999-12-15',
               '1999-12-16', '1999-12-17', '1999-12-20', '1999-12-21',
               '1999-12-22', '1999-12-23', '1999-12-24', '1999-12-27',
               '1999-12-28', '1999-12-29', '1999-12-30', '1999-12-31'],
              dtype='datetime64[ns]', freq='B')

## The .dt Accessor

In [58]:
bunch_of_dates = pd.date_range('2000-1-1',end='2010-12-31',freq='24D')

In [59]:
s = pd.Series(bunch_of_dates)
s.head(3)

0   2000-01-01
1   2000-01-25
2   2000-02-18
dtype: datetime64[ns]

In [60]:
s.dt.day

0       1
1      25
2      18
3      13
4       6
5      30
6      24
7      17
8      11
9       4
10     28
11     21
12     15
13      8
14      2
15     26
16     19
17     12
18      8
19      1
20     25
21     19
22     12
23      6
24     30
25     23
26     16
27     10
28      3
29     27
       ..
138    25
139    18
140    14
141     7
142     1
143    25
144    18
145    12
146     5
147    29
148    22
149    16
150     9
151     3
152    27
153    20
154    13
155     9
156     2
157    26
158    20
159    13
160     7
161    31
162    24
163    17
164    11
165     4
166    28
167    22
Length: 168, dtype: int64

In [61]:
s.dt.weekday_name

0       Saturday
1        Tuesday
2         Friday
3         Monday
4       Thursday
5         Sunday
6      Wednesday
7       Saturday
8        Tuesday
9         Friday
10        Monday
11      Thursday
12        Sunday
13     Wednesday
14      Saturday
15       Tuesday
16        Friday
17        Monday
18      Thursday
19        Sunday
20     Wednesday
21      Saturday
22       Tuesday
23        Friday
24        Monday
25      Thursday
26        Sunday
27     Wednesday
28      Saturday
29       Tuesday
         ...    
138       Sunday
139    Wednesday
140     Saturday
141      Tuesday
142       Friday
143       Monday
144     Thursday
145       Sunday
146    Wednesday
147     Saturday
148      Tuesday
149       Friday
150       Monday
151     Thursday
152       Sunday
153    Wednesday
154     Saturday
155      Tuesday
156       Friday
157       Monday
158     Thursday
159       Sunday
160    Wednesday
161     Saturday
162      Tuesday
163       Friday
164       Monday
165     Thursd

In [62]:
mask = s.dt.is_quarter_start
s[mask]
#Only dates on quarter start gets printed

0     2000-01-01
19    2001-04-01
38    2002-07-01
137   2009-01-01
dtype: datetime64[ns]