In [1]:
##################### datetime, pandas, timedelta ###############

import pandas as pd
import numpy as np
from datetime import datetime

In [2]:
my_year = 2020
my_month = 1
my_day = 2
my_hour = 13
my_minute = 30
my_second = 15

# creating datetime object
my_date = datetime(my_year, my_month, my_day, my_hour, my_minute, my_second)
print(my_date)

# we can also fetch year, month, dayofweek, day, hour, minute, second from this object
print(my_date.hour)

2020-01-02 13:30:15
13


# NumPy Datetime Arrays, Date Ranges
Just as <tt>np.arange(start,stop,step)</tt> can be used to produce an array of evenly-spaced integers, we can pass a <tt>dtype</tt> argument to obtain an array of dates. Remember that the stop date is <em>exclusive</em>.

In [3]:
# creating datetime series with NUMPY
series = np.array(['2002-03-15', '2020-03-16', '2020-02-17'], dtype='datetime64')
print(series)

# yearly
series = np.array(['2002-03-15', '2020-03-16', '2020-02-17'], dtype='datetime64[Y]')
print(series)

# daywise from a range
series = np.arange('2018-06-01', '2018-06-23', 7, dtype='datetime64[D]')
print(series)
# or by omitting the step value we can obtain every value based on the precision.
series = np.arange('2018-06-01', '2018-06-23', dtype='datetime64[D]') 


['2002-03-15' '2020-03-16' '2020-02-17']
['2002' '2020' '2020']
['2018-06-01' '2018-06-08' '2018-06-15' '2018-06-22']


NOTE: We see the dtype listed as 'datetime64[D]'. This tells us that NumPy applied a day-level date precision.
If we want we can pass in a different measurement, such as [h] for hour or [Y] for year.

# Pandas Datetime Index

In [4]:
# creating datetime series with PANDAS
# daywise
series = pd.date_range('2020-01-01', periods=7, freq='D')
print('daywise: ', series)
# or based on start and end date
myTseries = pd.date_range('2017-08-01', '2017-10-31')
print('size: ', myTseries.size) # print size of time series

# hour wise
myTseriesHrs = pd.date_range('2017-08-01', '2017-10-31', freq = 'H')
print('hourwise:', myTseriesHrs[:5])

# convert to datetime object from specified format to standard format
series = pd.to_datetime(['2020--7-1', '2018--8-6'], format='%Y--%d-%m')
print('conversion: ', series)

daywise:  DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
               '2020-01-05', '2020-01-06', '2020-01-07'],
              dtype='datetime64[ns]', freq='D')
size:  92
hourwise: DatetimeIndex(['2017-08-01 00:00:00', '2017-08-01 01:00:00',
               '2017-08-01 02:00:00', '2017-08-01 03:00:00',
               '2017-08-01 04:00:00'],
              dtype='datetime64[ns]', freq='H')
conversion:  DatetimeIndex(['2020-01-07', '2018-06-08'], dtype='datetime64[ns]', freq=None)


here we can provide datetime in any format and pandas will take care of it based on the format you give in format parameter

In [5]:
# creating pandas series with timeseries index
myTseriesSeq = pd.Series(np.random.normal(150, 10, len(myTseries)), index = myTseries)
print(myTseriesSeq.head())

myTseriesSeqHrs = pd.Series(np.random.normal(150, 10, len(myTseriesHrs)), index = myTseriesHrs)
print(myTseriesSeqHrs.head())
#print(myTseriesSeqHrs.tail())
#print(myTseriesSeqHrs.size)

2017-08-01    153.017634
2017-08-02    155.530080
2017-08-03    152.568520
2017-08-04    141.725196
2017-08-05    149.541564
Freq: D, dtype: float64
2017-08-01 00:00:00    149.073139
2017-08-01 01:00:00    147.292039
2017-08-01 02:00:00    159.156975
2017-08-01 03:00:00    157.859529
2017-08-01 04:00:00    144.758682
Freq: H, dtype: float64


In [16]:
# create timeseries dataframe
# create data 3 rows, 2 columns
data = np.random.randn(3,2)
cols = ['A', 'B'] # column names
idx = pd.date_range('2020-01-01', periods=3, freq='D') # index values
df = pd.DataFrame(data, columns=cols, index=idx) # create dataframe
print(df, '\n')
print(
    # index sorting
    df.index, '\n\n', 
    df.index.max(), '\n\n', 
    df.index.argmax(), '\n\n', 
    df.index.min(), '\n\n', 
    df.index.argmin(), '\n\n',

    # sorting index based on column values
    df.idxmin(), '\n\n', 
    df.idxmax(), '\n\n', 
    df[['A']].idxmin()
    )

                   A         B
2020-01-01 -0.307562 -1.648806
2020-01-02  0.031396  0.128582
2020-01-03 -1.639578 -1.263568 

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03'], dtype='datetime64[ns]', freq='D') 

 2020-01-03 00:00:00 

 2 

 2020-01-01 00:00:00 

 0 

 A   2020-01-03
B   2020-01-01
dtype: datetime64[ns] 

 A   2020-01-02
B   2020-01-02
dtype: datetime64[ns] 

 A   2020-01-03
dtype: datetime64[ns]


In [13]:
# creating time deltas
a = pd.Timedelta('1 days 4 hours 15 min 2 s 8 ms')
print(a)

# another way of creating time deltas
a = pd.Timedelta(days=1, hours=4, minutes=15, seconds=8, milliseconds=8)
print(a)

# another way of creating time deltas
a = pd.to_timedelta(np.arange(4), unit='s')
print(a)

# adding timedeltas
ts1 = pd.Series(pd.date_range('2017-10-01', periods=5, freq='D'))
print(ts1)
ts2 = pd.Timedelta(hours=4, minutes=15)
print(ts2)
print(ts1 + ts2)

1 days 04:15:02.008000
1 days 04:15:08.008000
TimedeltaIndex(['0 days 00:00:00', '0 days 00:00:01', '0 days 00:00:02',
                '0 days 00:00:03'],
               dtype='timedelta64[ns]', freq=None)
0   2017-10-01
1   2017-10-02
2   2017-10-03
3   2017-10-04
4   2017-10-05
dtype: datetime64[ns]
0 days 04:15:00
0   2017-10-01 04:15:00
1   2017-10-02 04:15:00
2   2017-10-03 04:15:00
3   2017-10-04 04:15:00
4   2017-10-05 04:15:00
dtype: datetime64[ns]
