In [4]:
import datetime
import pandas as pd
import numpy as np

In [2]:
# Time Series information can be parsed from various sources and formates

In [5]:
dti = pd.to_datetime(
["1/1/2018", np.datetime64("2018-01-01"), datetime.datetime(2018,1,1)])

In [6]:
dti

DatetimeIndex(['2018-01-01', '2018-01-01', '2018-01-01'], dtype='datetime64[ns]', freq=None)

In [7]:
# Generating sequencees of fixed-frequebcy dates and time spans #

In [10]:
fixed_frq_dates = pd.date_range("2018-01-01", periods=6, freq="H")

In [11]:
fixed_frq_dates

DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:00:00',
               '2018-01-01 02:00:00', '2018-01-01 03:00:00',
               '2018-01-01 04:00:00', '2018-01-01 05:00:00'],
              dtype='datetime64[ns]', freq='H')

In [13]:
# Converting to UTC time #

In [12]:
fixed_frq_dates.tz_localize("UTC")

DatetimeIndex(['2018-01-01 00:00:00+00:00', '2018-01-01 01:00:00+00:00',
               '2018-01-01 02:00:00+00:00', '2018-01-01 03:00:00+00:00',
               '2018-01-01 04:00:00+00:00', '2018-01-01 05:00:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='H')

In [14]:
# Converting to local time zone #

In [16]:
fixed_frq_dates.tz_localize("UTC").tz_convert("US/Pacific")

DatetimeIndex(['2017-12-31 16:00:00-08:00', '2017-12-31 17:00:00-08:00',
               '2017-12-31 18:00:00-08:00', '2017-12-31 19:00:00-08:00',
               '2017-12-31 20:00:00-08:00', '2017-12-31 21:00:00-08:00'],
              dtype='datetime64[ns, US/Pacific]', freq='H')

In [17]:
# Resampling or converting a time series to a particular frequency

In [19]:
idx = pd.date_range(start="2018-01-01", periods=5, freq="H")

In [22]:
ts = pd.Series(range(len(idx)), index=idx)

In [49]:
# ts obj. is sampled with hourly frequency
# let's resampled/downsample it with 2 Hour freq, and calc. mean
# Resampling is akin to groupBy with indices on index
# ts.resample("2H").groups will provide group key

In [42]:
ts.resample("2H").mean()

2018-01-01 00:00:00    0.5
2018-01-01 02:00:00    2.5
2018-01-01 04:00:00    4.0
Freq: 2H, dtype: float64

In [47]:
ts

2018-01-01 00:00:00    0
2018-01-01 01:00:00    1
2018-01-01 02:00:00    2
2018-01-01 03:00:00    3
2018-01-01 04:00:00    4
Freq: H, dtype: int64

In [46]:
ts.resample("2H").groups

{Timestamp('2018-01-01 00:00:00', freq='2H'): 2,
 Timestamp('2018-01-01 02:00:00', freq='2H'): 4,
 Timestamp('2018-01-01 04:00:00', freq='2H'): 5}

In [37]:
ts.resample("2H").get_group(name='2018-01-01 00:00:00')

2018-01-01 00:00:00    0
2018-01-01 01:00:00    1
Freq: H, dtype: int64

In [39]:
ts.resample("2H").get_group(name='2018-01-01 02:00:00')

2018-01-01 02:00:00    2
2018-01-01 03:00:00    3
Freq: H, dtype: int64

In [40]:
ts.resample("2H").get_group(name='2018-01-01 04:00:00')

2018-01-01 04:00:00    4
Freq: H, dtype: int64

In [51]:
# Doing Date and Time Arithemetic with abs or relative time increments #

In [64]:
friday = pd.Timestamp("2018-01-05")

In [65]:
friday.day_name()

'Friday'

In [66]:
saturday = friday + pd.Timedelta("1 day")

In [67]:
saturday.day_name()

'Saturday'

In [68]:
friday

Timestamp('2018-01-05 00:00:00')

In [69]:
monday = friday + pd.offsets.BDay()

In [70]:
monday.day_name()

'Monday'

## Overview

1. Date times: Similar to `datetime.datetime` from the standard library
2. Time delta: Similar to `datetime.timedelta`from python
3. Time spans: A span of time defined with req. frequency


In [73]:
# Series with time component in index #
pd.Series(range(4), index=pd.date_range(start="2000",
                                        freq="D", periods=4))

2000-01-01    0
2000-01-02    1
2000-01-03    2
2000-01-04    3
Freq: D, dtype: int64

In [74]:
pd.Series(pd.date_range(start="2000",freq="D", periods=4))

0   2000-01-01
1   2000-01-02
2   2000-01-03
3   2000-01-04
dtype: datetime64[ns]

In [75]:
pd.Series(pd.period_range(start="2000",freq="D", periods=4))

0    2000-01-01
1    2000-01-02
2    2000-01-03
3    2000-01-04
dtype: period[D]

In [76]:
# Null date times, time deltas and time spans as NAT
# can be usefull representing missing or null date like 

In [78]:
pd.Timestamp(pd.NaT)

NaT

In [80]:
pd.Timedelta(pd.NaT)

NaT

In [81]:
pd.Period(pd.NaT)

NaT

In [82]:
pd.NaT == pd.NaT

False

In [83]:
np.nan == np.nan

False

## Timestamps vs. time spans

**Timestamp:**
When values are associated with time:
> Like a graph between: `Temperature vs Time`

**timespans:**
Change in variables can be associated with a time span

In [85]:
pd.Timestamp(datetime.datetime(2012, 5, 1))

Timestamp('2012-05-01 00:00:00')

In [86]:
pd.Timestamp("2012-05-01")

Timestamp('2012-05-01 00:00:00')

In [87]:
pd.Timestamp(2012, 5, 1)

Timestamp('2012-05-01 00:00:00')

In [88]:
pd.Period("2011-01")

Period('2011-01', 'M')

In [89]:
pd.Period("2012-05", freq="D")

Period('2012-05-01', 'D')

In [91]:
# Both timestamp and period can serve as an index #
date = [pd.Timestamp("2012-05-01"),
        pd.Timestamp("2012-05-02"),
        pd.Timestamp("2012-05-03")]

In [93]:
ts = pd.Series(np.random.randn(3), date)

In [94]:
ts

2012-05-01   -0.350305
2012-05-02   -0.224016
2012-05-03   -1.233673
dtype: float64

In [95]:
type(ts.index)

pandas.core.indexes.datetimes.DatetimeIndex

In [96]:
ts.index

DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)

In [97]:
# Using Periods #
periods = [pd.Period("2012-01"),
           pd.Period("2012-02"),
           pd.Period("2012-03")]

In [98]:
ts_2 = pd.Series(np.random.randint(3), periods)

In [99]:
ts_2

2012-01    2
2012-02    2
2012-03    2
Freq: M, dtype: int64

In [100]:
type(ts_2.index)

pandas.core.indexes.period.PeriodIndex

In [101]:
ts_2.index

PeriodIndex(['2012-01', '2012-02', '2012-03'], dtype='period[M]', freq='M')

In [105]:
## Converting to timestamps ##
## Series obj gets converted to dateime with index unchanged
## list argument becomes datetimeIndex

In [103]:
pd.to_datetime(pd.Series(["Jul 31, 2009",
                          "2010-01-10", None]))

0   2009-07-31
1   2010-01-10
2          NaT
dtype: datetime64[ns]

In [106]:
# List of strings ->pd.to_datetime -> DateTimeIndex
pd.to_datetime(["2005/11/23", "2010.12.31"])

DatetimeIndex(['2005-11-23', '2010-12-31'], dtype='datetime64[ns]', freq=None)

In [107]:
# Str -> pd.to_datetime -> Timestamp
pd.to_datetime("2020/11/12")

Timestamp('2020-11-12 00:00:00')

In [111]:
# format argument can be passed to pd.to_datetime()
# ensures specific parsing
# can speed up the conservation considerably

In [112]:
pd.to_datetime("2010/11/12")

Timestamp('2010-11-12 00:00:00')

In [113]:
# Now providing format args #
pd.to_datetime("2010/11/12", format="%Y/%m/%d")

Timestamp('2010-11-12 00:00:00')

In [114]:
pd.to_datetime("12-11-2010", format="%d-%m-%Y")

Timestamp('2010-11-12 00:00:00')

In [108]:
pd.Timestamp("2020/11/12")

Timestamp('2020-11-12 00:00:00')

In [110]:
# DatetimeIndex constructor can be called directly #
pd.DatetimeIndex(["2018-01-01", 
                  "2018-01-03", 
                  "2018/01/05"])

DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-05'], dtype='datetime64[ns]', freq=None)

In [115]:
# Assembling datetime from multiple DataFrame Columns

In [116]:
df = pd.DataFrame(
{
    "year": [2015,2016],
    "month": [2,3],
    "day": [4,5],
    "hour":[2,3]
})

In [117]:
df

Unnamed: 0,year,month,day,hour
0,2015,2,4,2
1,2016,3,5,3


In [118]:
pd.to_datetime(df[["year","month","day"]])

0   2015-02-04
1   2016-03-05
dtype: datetime64[ns]

In [138]:
# Epoch timestamps #

In [140]:
pd.to_datetime([1349720105, 1349806505], unit="s")

DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05'], dtype='datetime64[ns]', freq=None)

In [141]:
# Timestamps to epoch #
stamps = pd.date_range("2012-10-08 18:15:05",
                       periods=4, freq="D")

In [142]:
stamps

DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',
               '2012-10-10 18:15:05', '2012-10-11 18:15:05'],
              dtype='datetime64[ns]', freq='D')

In [144]:
(stamps-pd.Timestamp("1970-01-01"))//pd.Timedelta("1s")

Int64Index([1349720105, 1349806505, 1349892905, 1349979305], dtype='int64')

In [145]:
pd.to_datetime([1,2,3], unit="D")

DatetimeIndex(['1970-01-02', '1970-01-03', '1970-01-04'], dtype='datetime64[ns]', freq=None)

In [146]:
# Generating ranges of timestamps #

In [147]:
dates = [datetime.datetime(2012,5,1),
         datetime.datetime(2012,5,2),
         datetime.datetime(2012,5,3)]

In [148]:
index= pd.DatetimeIndex(dates)

In [149]:
index

DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)

In [150]:
# Same as above #
index = pd.Index(dates)

In [151]:
index

DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)

In [152]:
# Creating timestamps on a regular frequency #

In [153]:
start = pd.Timestamp(2011,1,1)

In [154]:
end = pd.Timestamp(2012,1,1)

In [155]:
index = pd.date_range(start,end)

In [156]:
index

DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04',
               '2011-01-05', '2011-01-06', '2011-01-07', '2011-01-08',
               '2011-01-09', '2011-01-10',
               ...
               '2011-12-23', '2011-12-24', '2011-12-25', '2011-12-26',
               '2011-12-27', '2011-12-28', '2011-12-29', '2011-12-30',
               '2011-12-31', '2012-01-01'],
              dtype='datetime64[ns]', length=366, freq='D')

In [157]:
index = pd.bdate_range(start, end)
index

DatetimeIndex(['2011-01-03', '2011-01-04', '2011-01-05', '2011-01-06',
               '2011-01-07', '2011-01-10', '2011-01-11', '2011-01-12',
               '2011-01-13', '2011-01-14',
               ...
               '2011-12-19', '2011-12-20', '2011-12-21', '2011-12-22',
               '2011-12-23', '2011-12-26', '2011-12-27', '2011-12-28',
               '2011-12-29', '2011-12-30'],
              dtype='datetime64[ns]', length=260, freq='B')

In [158]:
pd.date_range(start, periods=1000, freq="M")

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-30',
               '2011-05-31', '2011-06-30', '2011-07-31', '2011-08-31',
               '2011-09-30', '2011-10-31',
               ...
               '2093-07-31', '2093-08-31', '2093-09-30', '2093-10-31',
               '2093-11-30', '2093-12-31', '2094-01-31', '2094-02-28',
               '2094-03-31', '2094-04-30'],
              dtype='datetime64[ns]', length=1000, freq='M')

In [167]:
pd.date_range(start,freq="W", periods=10)

DatetimeIndex(['2011-01-02', '2011-01-09', '2011-01-16', '2011-01-23',
               '2011-01-30', '2011-02-06', '2011-02-13', '2011-02-20',
               '2011-02-27', '2011-03-06'],
              dtype='datetime64[ns]', freq='W-SUN')

In [168]:
pd.Timestamp.min

Timestamp('1677-09-21 00:12:43.145225')

In [169]:
pd.Timestamp.max

Timestamp('2262-04-11 23:47:16.854775807')

## Indexing

In [170]:
rng = pd.date_range(start, end,freq="BM")

In [171]:
ts = pd.Series(np.random.randn(len(rng)), index=rng)

In [172]:
ts

2011-01-31    0.220608
2011-02-28   -0.872271
2011-03-31    0.073097
2011-04-29   -1.666693
2011-05-31   -0.094093
2011-06-30    1.395637
2011-07-29   -0.381317
2011-08-31   -0.408146
2011-09-30   -0.034366
2011-10-31    1.020184
2011-11-30    0.734959
2011-12-30    1.102772
Freq: BM, dtype: float64

In [173]:
## Partial String Indexing

In [180]:
ts.loc["1/31/2011"]

0.22060848122835192

In [179]:
ts.loc[datetime.datetime(2011,12,25):]

2011-12-30    1.102772
Freq: BM, dtype: float64

In [178]:
ts.loc["10/31/2011":"12/31/2011"]

2011-10-31    1.020184
2011-11-30    0.734959
2011-12-30    1.102772
Freq: BM, dtype: float64

In [181]:
ts.loc["2011"]

2011-01-31    0.220608
2011-02-28   -0.872271
2011-03-31    0.073097
2011-04-29   -1.666693
2011-05-31   -0.094093
2011-06-30    1.395637
2011-07-29   -0.381317
2011-08-31   -0.408146
2011-09-30   -0.034366
2011-10-31    1.020184
2011-11-30    0.734959
2011-12-30    1.102772
Freq: BM, dtype: float64

In [182]:
ts.loc["2011-6"]

2011-06-30    1.395637
Freq: BM, dtype: float64

In [184]:
dft = pd.DataFrame(np.random.randn(10000,1 ),
                   columns=["A"],
                   index=pd.date_range("20130101",
                                        periods=10000,
                                        freq="T"))

In [186]:
dft.loc["2013"]

Unnamed: 0,A
2013-01-01 00:00:00,1.347778
2013-01-01 00:01:00,1.192616
2013-01-01 00:02:00,-0.281894
2013-01-01 00:03:00,-1.086628
2013-01-01 00:04:00,-0.136054
...,...
2013-01-07 22:35:00,0.536998
2013-01-07 22:36:00,-1.328716
2013-01-07 22:37:00,-0.095318
2013-01-07 22:38:00,-0.643462


In [187]:
dft.loc["2013-1":"2013-2"]

Unnamed: 0,A
2013-01-01 00:00:00,1.347778
2013-01-01 00:01:00,1.192616
2013-01-01 00:02:00,-0.281894
2013-01-01 00:03:00,-1.086628
2013-01-01 00:04:00,-0.136054
...,...
2013-01-07 22:35:00,0.536998
2013-01-07 22:36:00,-1.328716
2013-01-07 22:37:00,-0.095318
2013-01-07 22:38:00,-0.643462


In [188]:
dft.loc["2013-1":"2013-2-28"]

Unnamed: 0,A
2013-01-01 00:00:00,1.347778
2013-01-01 00:01:00,1.192616
2013-01-01 00:02:00,-0.281894
2013-01-01 00:03:00,-1.086628
2013-01-01 00:04:00,-0.136054
...,...
2013-01-07 22:35:00,0.536998
2013-01-07 22:36:00,-1.328716
2013-01-07 22:37:00,-0.095318
2013-01-07 22:38:00,-0.643462


In [192]:
dft.loc["2013-1-15":"2013-1-15 12:30:00"]

Unnamed: 0,A


In [197]:
dft.loc["2013-1"]

Unnamed: 0,A
2013-01-01 00:00:00,1.347778
2013-01-01 00:01:00,1.192616
2013-01-01 00:02:00,-0.281894
2013-01-01 00:03:00,-1.086628
2013-01-01 00:04:00,-0.136054
...,...
2013-01-07 22:35:00,0.536998
2013-01-07 22:36:00,-1.328716
2013-01-07 22:37:00,-0.095318
2013-01-07 22:38:00,-0.643462


## With MultiIndex 

In [198]:
dft2 = pd.DataFrame(np.random.randn(20,1),
                    columns=["A"],
                    index=pd.MultiIndex.from_product(
                    [pd.date_range("20130101",
                                   periods=10,
                                   freq="12H"), ["a","b"]]))

In [200]:
dft2

Unnamed: 0,Unnamed: 1,A
2013-01-01 00:00:00,a,0.651789
2013-01-01 00:00:00,b,-0.022415
2013-01-01 12:00:00,a,-1.841491
2013-01-01 12:00:00,b,-0.582807
2013-01-02 00:00:00,a,0.074447
2013-01-02 00:00:00,b,-0.773029
2013-01-02 12:00:00,a,-0.859182
2013-01-02 12:00:00,b,-1.717053
2013-01-03 00:00:00,a,1.467197
2013-01-03 00:00:00,b,-0.462165


In [201]:
dft2.loc["2013-01-05"]

Unnamed: 0,Unnamed: 1,A
2013-01-05 00:00:00,a,-1.371595
2013-01-05 00:00:00,b,-0.071852
2013-01-05 12:00:00,a,0.554301
2013-01-05 12:00:00,b,-0.852896


In [202]:
idx = pd.IndexSlice

In [203]:
idx

<pandas.core.indexing._IndexSlice at 0x7faa924b77f0>

In [211]:
dft2 = dft2.swaplevel(0,1).sort_index()

In [215]:
dft2.loc[idx["2013-01-05","a"], :]

Unnamed: 0,Unnamed: 1,A
2013-01-05 00:00:00,a,-1.371595
2013-01-05 12:00:00,a,0.554301


In [212]:
dft2

Unnamed: 0,Unnamed: 1,A
2013-01-01 00:00:00,a,0.651789
2013-01-01 00:00:00,b,-0.022415
2013-01-01 12:00:00,a,-1.841491
2013-01-01 12:00:00,b,-0.582807
2013-01-02 00:00:00,a,0.074447
2013-01-02 00:00:00,b,-0.773029
2013-01-02 12:00:00,a,-0.859182
2013-01-02 12:00:00,b,-1.717053
2013-01-03 00:00:00,a,1.467197
2013-01-03 00:00:00,b,-0.462165


In [216]:
## Slice vs. Exact Match

In [227]:
series_minute = pd.Series([1,2,3],
                          pd.DatetimeIndex(
                              ["2011-12-31 23:59:00", 
                               "2012-01-01 00:00:00", 
                               "2012-01-01 00:02:00"]
                          ))

In [223]:
series_minute.index.resolution


'second'

In [224]:
series_minute.loc["2011-12-31 23"]

2011-12-31 23:59:33    1
dtype: int64

In [225]:
series_minute.loc["2011-12-31 23:59"]

2011-12-31 23:59:33    1
dtype: int64

In [226]:
series_minute.loc["2011-12-31 23:59:33"]

1

In [228]:
dft_minute = pd.DataFrame(
                 {"a": [1,2,3], "b": [4,5,6]},
                 index=series_minute.index)

In [230]:
dft_minute.loc["2011-12-31 23"]

Unnamed: 0,a,b
2011-12-31 23:59:00,1,4


In [231]:
dft_minute.loc["2011-12-31 23:59"]

a    1
b    4
Name: 2011-12-31 23:59:00, dtype: int64

In [232]:
series  = pd.Series([1,2],
                    index=[pd.Timestamp("2011-12-30"),
                           pd.Timestamp("2011-12-31")])

In [235]:
series.loc['2011-12']

2011-12-30    1
2011-12-31    2
dtype: int64

In [236]:
rng2 = pd.date_range("2011-01-01", "2012-01-01", freq="W")
ts2 = pd.Series(np.random.randn(len(rng2)), index=rng2)

In [237]:
ts2

2011-01-02    1.116167
2011-01-09   -1.418032
2011-01-16    0.068921
2011-01-23    2.189581
2011-01-30   -1.502494
2011-02-06   -0.163274
2011-02-13   -1.599377
2011-02-20    0.091595
2011-02-27   -0.588517
2011-03-06   -1.581058
2011-03-13   -0.039902
2011-03-20   -0.388749
2011-03-27   -0.011646
2011-04-03   -0.003046
2011-04-10   -0.239177
2011-04-17   -0.507566
2011-04-24   -0.645304
2011-05-01    2.279336
2011-05-08   -1.379765
2011-05-15    0.261132
2011-05-22   -0.525877
2011-05-29    0.399367
2011-06-05    0.145498
2011-06-12    0.570071
2011-06-19    2.137592
2011-06-26    0.181878
2011-07-03   -0.452068
2011-07-10    1.427895
2011-07-17    2.691462
2011-07-24   -1.489073
2011-07-31    0.385484
2011-08-07   -0.153604
2011-08-14    0.694771
2011-08-21    1.732471
2011-08-28   -0.462827
2011-09-04   -0.632085
2011-09-11   -0.231121
2011-09-18   -0.895613
2011-09-25    0.997458
2011-10-02    0.308814
2011-10-09    0.475744
2011-10-16    0.515165
2011-10-23    0.165192
2011-10-30 

In [238]:
ts2.truncate(before="2011-11", after="2011-12")

2011-11-06    0.598173
2011-11-13   -1.739870
2011-11-20   -0.536498
2011-11-27   -0.916020
Freq: W-SUN, dtype: float64

In [239]:
ts2.loc["2011-11"]

2011-11-06    0.598173
2011-11-13   -1.739870
2011-11-20   -0.536498
2011-11-27   -0.916020
Freq: W-SUN, dtype: float64

In [243]:
ts2.iloc[[0,2,6]].index

DatetimeIndex(['2011-01-02', '2011-01-16', '2011-02-13'], dtype='datetime64[ns]', freq=None)

## Time Series-related instance Methods

In [245]:
data.index = data['Date']

In [270]:
data['T'].replace({-200:np.nan}, inplace=True)

In [271]:
temp_data = data[['T']]

In [272]:
temp_data['lead_1'] = temp_data.shift(1)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [273]:
temp_data['diff'] = temp_data['lead_1'] - temp_data['T']



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [274]:
import plotly.express as px

In [275]:
px.line(temp_data, y=['T'])