In [1]:
from datetime import datetime
datetime(year=2015, month=7, day=4)

datetime.datetime(2015, 7, 4, 0, 0)

In [2]:
from dateutil import parser
date = parser.parse("4th of July, 2015")
date

datetime.datetime(2015, 7, 4, 0, 0)

In [4]:
# let's print the day of the week
date.strftime('%A')

'Saturday'

In [5]:
import numpy as np
date = np.array('2015-07-04', dtype=np.datetime64)
date

array('2015-07-04', dtype='datetime64[D]')

In [8]:
# let us vectorize the operation
date + np.arange(12)

array(['2015-07-04', '2015-07-05', '2015-07-06', '2015-07-07',
       '2015-07-08', '2015-07-09', '2015-07-10', '2015-07-11',
       '2015-07-12', '2015-07-13', '2015-07-14', '2015-07-15'],
      dtype='datetime64[D]')

In [9]:
# # Constructing a DatetimeIndex that can be used in a Series or DataFrame;
import pandas as pd
date = pd.to_datetime("4th of July, 2015")
date

Timestamp('2015-07-04 00:00:00')

In [11]:
date.strftime('%A')

'Saturday'

In [12]:
# we can do NumPy-style vectorized operations directly on this same object
date + pd.to_timedelta(np.arange(12), 'D')

DatetimeIndex(['2015-07-04', '2015-07-05', '2015-07-06', '2015-07-07',
               '2015-07-08', '2015-07-09', '2015-07-10', '2015-07-11',
               '2015-07-12', '2015-07-13', '2015-07-14', '2015-07-15'],
              dtype='datetime64[ns]', freq=None)

Pandas Time Series:Indexing by Time
We can construct a Series object that has time indexed data

In [13]:
index = pd.DatetimeIndex(['2014-07-04', '2014-08-04',
                         '2015-07-04', '2015-08-04'])
data = pd.Series([0, 1, 2, 3], index=index)
data

2014-07-04    0
2014-08-04    1
2015-07-04    2
2015-08-04    3
dtype: int64

In [14]:
# passing values that can be coerced into dates
data['2014-07-04':'2015-07-04']

2014-07-04    0
2014-08-04    1
2015-07-04    2
dtype: int64

In [15]:
data['2015']

2015-07-04    2
2015-08-04    3
dtype: int64

Pandas Time Series Data Structures
Time stamps associated structure is DatetimeIndex
Time periods - PeriodIndex
Time deltas or durations - TimedeltaIndex
we use the pd.to.datetime() which can parse a wide variety of formats.
Passing a single date to pd.to_datetime() yields a Timestamp;passing a series of dates by default yields a DatetimeIndex


In [17]:
dates = pd.to_datetime([datetime(2015, 7, 3), '4th of July, 2015',
                      '2015-Jul-6', '07-07-2015', '20150708'])
dates

DatetimeIndex(['2015-07-03', '2015-07-04', '2015-07-06', '2015-07-07',
               '2015-07-08'],
              dtype='datetime64[ns]', freq=None)

In [18]:
# lets convert the DatatimeIndex can be converted to a PeriodIndex 
dates.to_period('D')

PeriodIndex(['2015-07-03', '2015-07-04', '2015-07-06', '2015-07-07',
             '2015-07-08'],
            dtype='period[D]', freq='D')

In [19]:
# A TimedeltaIndex is created when on date is subtracted from another
dates - dates[0]

TimedeltaIndex(['0 days', '1 days', '3 days', '4 days', '5 days'], dtype='timedelta64[ns]', freq=None)

Regular sequences:pd.date_range()
To make the creation of regular date sequences more convinient, we use:
1. pd.date_range() for timestamps
2. pd.period_range() for periods
3. pd.timedelta_range() for time deltas
This functions accepts a start date, and end date, and an optional frequency code to create a regular sequence of dates



In [20]:
pd.date_range('2015-07-03', '2015-07-10')

DatetimeIndex(['2015-07-03', '2015-07-04', '2015-07-05', '2015-07-06',
               '2015-07-07', '2015-07-08', '2015-07-09', '2015-07-10'],
              dtype='datetime64[ns]', freq='D')

In [21]:
pd.date_range('2015-07-03', periods=8)

DatetimeIndex(['2015-07-03', '2015-07-04', '2015-07-05', '2015-07-06',
               '2015-07-07', '2015-07-08', '2015-07-09', '2015-07-10'],
              dtype='datetime64[ns]', freq='D')

In [25]:
pd.date_range('2015-07-03', periods=8, freq='H')


DatetimeIndex(['2015-07-03 00:00:00', '2015-07-03 01:00:00',
               '2015-07-03 02:00:00', '2015-07-03 03:00:00',
               '2015-07-03 04:00:00', '2015-07-03 05:00:00',
               '2015-07-03 06:00:00', '2015-07-03 07:00:00'],
              dtype='datetime64[ns]', freq='H')

In [26]:
pd.period_range('2015-07', periods=8, freq='M')
pd.timedelta_range(0, periods=10, freq='H')

TimedeltaIndex(['00:00:00', '01:00:00', '02:00:00', '03:00:00', '04:00:00',
                '05:00:00', '06:00:00', '07:00:00', '08:00:00', '09:00:00'],
               dtype='timedelta64[ns]', freq='H')

In [27]:
from pandas_datareader import data
goog = data.DataReader('GOOG', start='2004', end='2016',
                      data_source='google')
goog.head()

ModuleNotFoundError: No module named 'pandas_datareader'

In [5]:
import numpy as np
import pandas as pd

N = 20

ts = pd.Series(
    np.arrange(N),
    index=pd.period_range(start='2010', freq='A', periods = N, name='Year')
    dtype=pd IntDtype,
)
# Lag features
X = pd.DataFrame({
    'y_lag_2': ts.shift(2),
    'y_lag_3': ts.shift(3),
    'y_lag_4': ts.shift(4),
    'y_lag_5': ts.shift(5),
    'y_lag_6': ts.shift(6),
})

# Multistep targets
Y = pd.DataFrame({
    'y_step_3': ts.shift(-2),
    'y_step_2': ts.shift(-1),
    'y_step_1': ts,
})

data = pd.concat({'Targets': X, 'Features': X}, axis=1)
data.head(10).style.set_properties(['Targets'], **{'background-color': "LavenderBlush"})\
                   .set_properties(['Features'], **{'background-color': 'Lavender'})

SyntaxError: invalid syntax (<ipython-input-5-137c0f9e4bf3>, line 9)