In [1]:
import pandas as pd
import datetime as dt

# review of Python's datetime module

In [2]:
dt.date(1984, 12, 21)

datetime.date(1984, 12, 21)

In [3]:
str(dt.date(1984, 12, 21))

'1984-12-21'

In [4]:
birthday = dt.date(1984, 12, 21)
birthday.day

21

In [5]:
birthday.month

12

In [6]:
birthday.year

1984

In [7]:
someday = dt.datetime(1984, 12, 21, 23, 49, 32)

In [8]:
someday.day

21

In [9]:
someday.hour

23

In [10]:
someday.second

32

# pandas Timestamp Object

In [11]:
pd.Timestamp("2015-09-12")

Timestamp('2015-09-12 00:00:00')

In [12]:
pd.Timestamp("09/12/15")

Timestamp('2015-09-12 00:00:00')

In [13]:
pd.Timestamp("2/12/2015 12:34")

Timestamp('2015-02-12 12:34:00')

In [14]:
pd.Timestamp(dt.datetime(1984, 12, 21, 23, 49, 32))

Timestamp('1984-12-21 23:49:32')

# pandas DateTimeIndex 

In [15]:
dates = ["2016/01/01", "2016/01/02", "2016/01/03"]
pd.DatetimeIndex(dates)

DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03'], dtype='datetime64[ns]', freq=None)

In [16]:
dates = pd.DatetimeIndex(dates)
series_value = [100, 200, 300]
pd.Series(index = dates, data = series_value)

2016-01-01    100
2016-01-02    200
2016-01-03    300
dtype: int64

# pd.to_datetime()

In [17]:
pd.to_datetime(dates)

DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03'], dtype='datetime64[ns]', freq=None)

In [18]:
pd.Series(dates)

0   2016-01-01
1   2016-01-02
2   2016-01-03
dtype: datetime64[ns]

In [19]:
pd.to_datetime(pd.Series(dates))

0   2016-01-01
1   2016-01-02
2   2016-01-03
dtype: datetime64[ns]

In [20]:
dates

DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03'], dtype='datetime64[ns]', freq=None)

In [21]:
mixedData = ["2016/01/01", "Hero", "2016/01/03"]

In [22]:
pd.to_datetime(mixedData, errors = "coerce")

DatetimeIndex(['2016-01-01', 'NaT', '2016-01-03'], dtype='datetime64[ns]', freq=None)

# pd.date_range()

In [23]:
pd.date_range("2016-01-01", "2016-01-10", freq = "2D")

DatetimeIndex(['2016-01-01', '2016-01-03', '2016-01-05', '2016-01-07',
               '2016-01-09'],
              dtype='datetime64[ns]', freq='2D')

In [24]:
pd.date_range(start = "2016-01-01", periods = 20, freq = "2D")

DatetimeIndex(['2016-01-01', '2016-01-03', '2016-01-05', '2016-01-07',
               '2016-01-09', '2016-01-11', '2016-01-13', '2016-01-15',
               '2016-01-17', '2016-01-19', '2016-01-21', '2016-01-23',
               '2016-01-25', '2016-01-27', '2016-01-29', '2016-01-31',
               '2016-02-02', '2016-02-04', '2016-02-06', '2016-02-08'],
              dtype='datetime64[ns]', freq='2D')

In [25]:
pd.date_range(end = "2016-01-01", periods = 20, freq = "2D")

DatetimeIndex(['2015-11-24', '2015-11-26', '2015-11-28', '2015-11-30',
               '2015-12-02', '2015-12-04', '2015-12-06', '2015-12-08',
               '2015-12-10', '2015-12-12', '2015-12-14', '2015-12-16',
               '2015-12-18', '2015-12-20', '2015-12-22', '2015-12-24',
               '2015-12-26', '2015-12-28', '2015-12-30', '2016-01-01'],
              dtype='datetime64[ns]', freq='2D')

# dt accessor

In [26]:
pd.date_range(start = "2016-01-01", end = "2026-01-10", freq = "24D")

DatetimeIndex(['2016-01-01', '2016-01-25', '2016-02-18', '2016-03-13',
               '2016-04-06', '2016-04-30', '2016-05-24', '2016-06-17',
               '2016-07-11', '2016-08-04',
               ...
               '2025-05-25', '2025-06-18', '2025-07-12', '2025-08-05',
               '2025-08-29', '2025-09-22', '2025-10-16', '2025-11-09',
               '2025-12-03', '2025-12-27'],
              dtype='datetime64[ns]', length=153, freq='24D')

In [27]:
bunch_of_dates = pd.date_range(start = "2016-01-01", end = "2026-01-10", freq = "24D")

In [28]:
s = pd.Series(bunch_of_dates)

In [29]:
s.head(3)

0   2016-01-01
1   2016-01-25
2   2016-02-18
dtype: datetime64[ns]

In [30]:
s.dt.month

0       1
1       1
2       2
3       3
4       4
       ..
148     9
149    10
150    11
151    12
152    12
Length: 153, dtype: int64

In [31]:
s.dt.weekday

0      4
1      0
2      3
3      6
4      2
      ..
148    0
149    3
150    6
151    2
152    5
Length: 153, dtype: int64

# pandas_datareader

In [32]:
from pandas_datareader import data

In [33]:
data.DataReader(name = "MSFT", data_source = "yahoo", start = "2010-01-01", end = "2020-12-31")

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.100000,30.590000,30.620001,30.950001,38409100.0,23.683075
2010-01-05,31.100000,30.639999,30.850000,30.959999,49749600.0,23.690722
2010-01-06,31.080000,30.520000,30.879999,30.770000,58182400.0,23.545328
2010-01-07,30.700001,30.190001,30.629999,30.450001,50559700.0,23.300468
2010-01-08,30.879999,30.240000,30.280001,30.660000,51197400.0,23.461159
...,...,...,...,...,...,...
2020-12-24,223.610001,221.199997,221.419998,222.750000,10550600.0,218.847702
2020-12-28,226.029999,223.020004,224.449997,224.960007,17933500.0,221.019012
2020-12-29,227.179993,223.580002,226.309998,224.149994,17403200.0,220.223206
2020-12-30,225.630005,221.470001,225.229996,221.679993,20272300.0,217.796448


In [34]:
stock = data.DataReader(name = "MSFT", data_source = "yahoo", start = "2010-01-01", end = "2020-12-31")

In [35]:
stock.values

array([[3.11000004e+01, 3.05900002e+01, 3.06200008e+01, 3.09500008e+01,
        3.84091000e+07, 2.36830750e+01],
       [3.11000004e+01, 3.06399994e+01, 3.08500004e+01, 3.09599991e+01,
        4.97496000e+07, 2.36907234e+01],
       [3.10799999e+01, 3.05200005e+01, 3.08799992e+01, 3.07700005e+01,
        5.81824000e+07, 2.35453339e+01],
       ...,
       [2.27179993e+02, 2.23580002e+02, 2.26309998e+02, 2.24149994e+02,
        1.74032000e+07, 2.20223206e+02],
       [2.25630005e+02, 2.21470001e+02, 2.25229996e+02, 2.21679993e+02,
        2.02723000e+07, 2.17796463e+02],
       [2.23000000e+02, 2.19679993e+02, 2.21699997e+02, 2.22419998e+02,
        2.09421000e+07, 2.18523514e+02]])

In [36]:
stock.index

DatetimeIndex(['2010-01-04', '2010-01-05', '2010-01-06', '2010-01-07',
               '2010-01-08', '2010-01-11', '2010-01-12', '2010-01-13',
               '2010-01-14', '2010-01-15',
               ...
               '2020-12-17', '2020-12-18', '2020-12-21', '2020-12-22',
               '2020-12-23', '2020-12-24', '2020-12-28', '2020-12-29',
               '2020-12-30', '2020-12-31'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

In [37]:
stock.columns

Index(['High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close'], dtype='object')

In [38]:
stock.axes

[DatetimeIndex(['2010-01-04', '2010-01-05', '2010-01-06', '2010-01-07',
                '2010-01-08', '2010-01-11', '2010-01-12', '2010-01-13',
                '2010-01-14', '2010-01-15',
                ...
                '2020-12-17', '2020-12-18', '2020-12-21', '2020-12-22',
                '2020-12-23', '2020-12-24', '2020-12-28', '2020-12-29',
                '2020-12-30', '2020-12-31'],
               dtype='datetime64[ns]', name='Date', length=2769, freq=None),
 Index(['High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close'], dtype='object')]

In [39]:
stock.dtypes

High         float64
Low          float64
Open         float64
Close        float64
Volume       float64
Adj Close    float64
dtype: object

In [40]:
type(stock)

pandas.core.frame.DataFrame

In [41]:
stock.loc["2010-01-04"]

High         3.110000e+01
Low          3.059000e+01
Open         3.062000e+01
Close        3.095000e+01
Volume       3.840910e+07
Adj Close    2.368307e+01
Name: 2010-01-04 00:00:00, dtype: float64

In [45]:
stock.loc[pd.to_datetime("2010-01-04")]

High         3.110000e+01
Low          3.059000e+01
Open         3.062000e+01
Close        3.095000e+01
Volume       3.840910e+07
Adj Close    2.368307e+01
Name: 2010-01-04 00:00:00, dtype: float64

In [46]:
stock.loc[pd.Timestamp("2010-01-04")]

High         3.110000e+01
Low          3.059000e+01
Open         3.062000e+01
Close        3.095000e+01
Volume       3.840910e+07
Adj Close    2.368307e+01
Name: 2010-01-04 00:00:00, dtype: float64

In [48]:
stock.index

DatetimeIndex(['2010-01-04', '2010-01-05', '2010-01-06', '2010-01-07',
               '2010-01-08', '2010-01-11', '2010-01-12', '2010-01-13',
               '2010-01-14', '2010-01-15',
               ...
               '2020-12-17', '2020-12-18', '2020-12-21', '2020-12-22',
               '2020-12-23', '2020-12-24', '2020-12-28', '2020-12-29',
               '2020-12-30', '2020-12-31'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

In [50]:
stock.index + pd.DateOffset(days = 5)

DatetimeIndex(['2010-01-09', '2010-01-10', '2010-01-11', '2010-01-12',
               '2010-01-13', '2010-01-16', '2010-01-17', '2010-01-18',
               '2010-01-19', '2010-01-20',
               ...
               '2020-12-22', '2020-12-23', '2020-12-26', '2020-12-27',
               '2020-12-28', '2020-12-29', '2021-01-02', '2021-01-03',
               '2021-01-04', '2021-01-05'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

# Timeseries Offsets

In [51]:
stock.index + pd.tseries.offsets.MonthEnd()

DatetimeIndex(['2010-01-31', '2010-01-31', '2010-01-31', '2010-01-31',
               '2010-01-31', '2010-01-31', '2010-01-31', '2010-01-31',
               '2010-01-31', '2010-01-31',
               ...
               '2020-12-31', '2020-12-31', '2020-12-31', '2020-12-31',
               '2020-12-31', '2020-12-31', '2020-12-31', '2020-12-31',
               '2020-12-31', '2021-01-31'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

In [52]:
stock.index - pd.tseries.offsets.MonthEnd()

DatetimeIndex(['2009-12-31', '2009-12-31', '2009-12-31', '2009-12-31',
               '2009-12-31', '2009-12-31', '2009-12-31', '2009-12-31',
               '2009-12-31', '2009-12-31',
               ...
               '2020-11-30', '2020-11-30', '2020-11-30', '2020-11-30',
               '2020-11-30', '2020-11-30', '2020-11-30', '2020-11-30',
               '2020-11-30', '2020-11-30'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

In [53]:
from pandas.tseries import offsets

In [54]:
stock.index - offsets.MonthBegin()

DatetimeIndex(['2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01',
               ...
               '2020-12-01', '2020-12-01', '2020-12-01', '2020-12-01',
               '2020-12-01', '2020-12-01', '2020-12-01', '2020-12-01',
               '2020-12-01', '2020-12-01'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

In [55]:
stock.index - offsets.BMonthBegin()

DatetimeIndex(['2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01',
               ...
               '2020-12-01', '2020-12-01', '2020-12-01', '2020-12-01',
               '2020-12-01', '2020-12-01', '2020-12-01', '2020-12-01',
               '2020-12-01', '2020-12-01'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

# TimeDelta Object

In [57]:
time_a = pd.Timestamp("2010-01-01")
time_b = pd.Timestamp("2010-01-02")
time_b - time_a

Timedelta('1 days 00:00:00')

In [58]:
time_a - time_b

Timedelta('-1 days +00:00:00')

In [65]:
pd.Timedelta(days = 2, hours = 2, minutes = 2)

Timedelta('2 days 02:02:00')