# Pandas Time Series

## Read Time Series Data

In [1]:
import pandas as pd

ts_1 = pd.read_csv('temp_all.csv')
print(ts_1.head())
print(type(ts_1))

                Date temp
0  20150101 17:00:00   24
1  20150101 17:30:00   24
2  20150101 18:00:00   23
3  20150101 18:30:00   23
4  20150101 19:00:00   23
<class 'pandas.core.frame.DataFrame'>


In [2]:
ts_1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 79048 entries, 0 to 79047
Data columns (total 2 columns):
Date    79048 non-null object
temp    79010 non-null object
dtypes: object(2)
memory usage: 1.2+ MB


In [3]:
ts_2 = pd.read_csv('temp_all.csv', parse_dates = ['Date'])
print(ts_2.head())
print(type(ts_2))

                 Date temp
0 2015-01-01 17:00:00   24
1 2015-01-01 17:30:00   24
2 2015-01-01 18:00:00   23
3 2015-01-01 18:30:00   23
4 2015-01-01 19:00:00   23
<class 'pandas.core.frame.DataFrame'>


In [4]:
ts_2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 79048 entries, 0 to 79047
Data columns (total 2 columns):
Date    79048 non-null datetime64[ns]
temp    79010 non-null object
dtypes: datetime64[ns](1), object(1)
memory usage: 1.2+ MB


In [5]:
ts_3 = pd.read_csv('temp_all.csv', index_col = 'Date', parse_dates = True)
print(ts_3.head())
print(type(ts_3))

                    temp
Date                    
2015-01-01 17:00:00   24
2015-01-01 17:30:00   24
2015-01-01 18:00:00   23
2015-01-01 18:30:00   23
2015-01-01 19:00:00   23
<class 'pandas.core.frame.DataFrame'>


## Extracting Date-Time Data

In [6]:
ts_2['Date'].dt.day.head()

0    1
1    1
2    1
3    1
4    1
Name: Date, dtype: int64

In [7]:
ts_2['Date'].dt.hour.head()

0    17
1    17
2    18
3    18
4    19
Name: Date, dtype: int64

In [8]:
ts_2['Date'].dt.minute.head()

0     0
1    30
2     0
3    30
4     0
Name: Date, dtype: int64

## Slicing Times

In [9]:
ts_3.loc['2015-Jan-02']

Unnamed: 0_level_0,temp
Date,Unnamed: 1_level_1
2015-01-02 00:00:00,23
2015-01-02 00:30:00,24
2015-01-02 01:00:00,24
2015-01-02 01:30:00,24
2015-01-02 02:00:00,25
2015-01-02 02:30:00,26
2015-01-02 03:00:00,27
2015-01-02 03:30:00,27
2015-01-02 04:00:00,28
2015-01-02 04:30:00,27


In [10]:
ts_3.loc['2015-Jan-02 06:00:00']

Unnamed: 0_level_0,temp
Date,Unnamed: 1_level_1
2015-01-02 06:00:00,28


In [11]:
ts_3.loc['2015-01']

Unnamed: 0_level_0,temp
Date,Unnamed: 1_level_1
2015-01-01 17:00:00,24
2015-01-01 17:30:00,24
2015-01-01 18:00:00,23
2015-01-01 18:30:00,23
2015-01-01 19:00:00,23
2015-01-01 19:30:00,23
2015-01-01 20:00:00,23
2015-01-01 20:30:00,23
2015-01-01 21:00:00,23
2015-01-01 22:00:00,23


In [12]:
ts_3.loc['2016']

Unnamed: 0_level_0,temp
Date,Unnamed: 1_level_1
2016-01-01 00:00:00,26
2016-01-01 00:30:00,26
2016-01-01 01:00:00,27
2016-01-01 01:30:00,28
2016-01-01 02:00:00,28
2016-01-01 02:30:00,30
2016-01-01 03:00:00,30
2016-01-01 03:30:00,30
2016-01-01 04:00:00,31
2016-01-01 04:30:00,30


In [13]:
ts_3.loc['2015-Jan-02 06:00:00' : '2015-Jan-03 06:00:00']

Unnamed: 0_level_0,temp
Date,Unnamed: 1_level_1
2015-01-02 06:00:00,28
2015-01-02 06:30:00,28
2015-01-02 07:00:00,28
2015-01-02 07:30:00,27
2015-01-02 08:00:00,26
2015-01-02 08:30:00,26
2015-01-02 09:00:00,25
2015-01-02 09:30:00,25
2015-01-02 10:00:00,25
2015-01-02 10:30:00,25


In [14]:
ts_3.loc['2015' : '2016']

Unnamed: 0_level_0,temp
Date,Unnamed: 1_level_1
2015-01-01 17:00:00,24
2015-01-01 17:30:00,24
2015-01-01 18:00:00,23
2015-01-01 18:30:00,23
2015-01-01 19:00:00,23
2015-01-01 19:30:00,23
2015-01-01 20:00:00,23
2015-01-01 20:30:00,23
2015-01-01 21:00:00,23
2015-01-01 22:00:00,23


## Convert List to Pandas series

In [15]:
date_list = ['20150101 00:00:00',
             '20150101 01:00:00',
             '20150101 02:00:00',
             '20150101 03:00:00',
             '20150101 04:00:00',
             '20150101 05:00:00',
             '20150101 06:00:00']
print(date_list)

['20150101 00:00:00', '20150101 01:00:00', '20150101 02:00:00', '20150101 03:00:00', '20150101 04:00:00', '20150101 05:00:00', '20150101 06:00:00']


In [17]:
temp_list = [23, 25, 28, 23, 23, 25, 21]
print(temp_list)

[23, 25, 28, 23, 23, 25, 21]


In [18]:
date_time = pd.to_datetime(date_list, format = '%Y-%m-%d %H:%M:%S')
print(date_time)
print(type(date_time))

DatetimeIndex(['2015-01-01 00:00:00', '2015-01-01 01:00:00',
               '2015-01-01 02:00:00', '2015-01-01 03:00:00',
               '2015-01-01 04:00:00', '2015-01-01 05:00:00',
               '2015-01-01 06:00:00'],
              dtype='datetime64[ns]', freq=None)
<class 'pandas.core.indexes.datetimes.DatetimeIndex'>


In [19]:
time_series = pd.Series(temp_list, index = date_time)
print(time_series.head())
type(time_series)

2015-01-01 00:00:00    23
2015-01-01 01:00:00    25
2015-01-01 02:00:00    28
2015-01-01 03:00:00    23
2015-01-01 04:00:00    23
dtype: int64


pandas.core.series.Series

## Reindexing the Time Series Index

In [20]:
date_1 = ['20150101 00:00:00',
             '20150101 01:00:00',
             '20150101 02:00:00',
             '20150101 03:00:00',
             '20150101 04:00:00',
             '20150101 05:00:00',
             '20150101 06:00:00']
temp_1 = [23, 25, 28, 23, 23, 25, 21]
dt_1 = pd.to_datetime(date_1, format = '%Y-%m-%d %H:%M:%S')
ts_1 = pd.Series(temp_1, index = dt_1)
print(ts_1)

2015-01-01 00:00:00    23
2015-01-01 01:00:00    25
2015-01-01 02:00:00    28
2015-01-01 03:00:00    23
2015-01-01 04:00:00    23
2015-01-01 05:00:00    25
2015-01-01 06:00:00    21
dtype: int64


In [21]:
date_2 = date_1 = ['20150101 00:30:00',
             '20150101 01:00:00',
             '20150101 01:30:00',
             '20150101 02:00:00',
             '20150101 02:30:00',
             '20150101 03:00:00',
             '20150101 04:30:00',
             '20150101 05:00:00',
             '20150101 05:30:00',
             '20150101 06:00:00']
temp_2 = [23, 25, 28, 23, 23, 25, 21, 22, 24, 22]
dt_2 = pd.to_datetime(date_2, format = '%Y-%m-%d %H:%M:%S')
ts_2 = pd.Series(temp_2, index = dt_2)
print(ts_2)

2015-01-01 00:30:00    23
2015-01-01 01:00:00    25
2015-01-01 01:30:00    28
2015-01-01 02:00:00    23
2015-01-01 02:30:00    23
2015-01-01 03:00:00    25
2015-01-01 04:30:00    21
2015-01-01 05:00:00    22
2015-01-01 05:30:00    24
2015-01-01 06:00:00    22
dtype: int64


In [22]:
ts_3 = ts_1.reindex(ts_2.index)
print(ts_3)

2015-01-01 00:30:00     NaN
2015-01-01 01:00:00    25.0
2015-01-01 01:30:00     NaN
2015-01-01 02:00:00    28.0
2015-01-01 02:30:00     NaN
2015-01-01 03:00:00    23.0
2015-01-01 04:30:00     NaN
2015-01-01 05:00:00    25.0
2015-01-01 05:30:00     NaN
2015-01-01 06:00:00    21.0
dtype: float64


In [23]:
ts_4 = ts_1.reindex(ts_2.index, method='ffill')
print(ts_4)

2015-01-01 00:30:00    23
2015-01-01 01:00:00    25
2015-01-01 01:30:00    25
2015-01-01 02:00:00    28
2015-01-01 02:30:00    28
2015-01-01 03:00:00    23
2015-01-01 04:30:00    23
2015-01-01 05:00:00    25
2015-01-01 05:30:00    25
2015-01-01 06:00:00    21
dtype: int64


In [24]:
ts_5 = ts_1.reindex(ts_2.index, method='bfill')
print(ts_5)

2015-01-01 00:30:00    25
2015-01-01 01:00:00    25
2015-01-01 01:30:00    28
2015-01-01 02:00:00    28
2015-01-01 02:30:00    23
2015-01-01 03:00:00    23
2015-01-01 04:30:00    25
2015-01-01 05:00:00    25
2015-01-01 05:30:00    21
2015-01-01 06:00:00    21
dtype: int64


In [25]:
ts_sum1 = ts_1 + ts_2
print(ts_sum1)

2015-01-01 00:00:00     NaN
2015-01-01 00:30:00     NaN
2015-01-01 01:00:00    50.0
2015-01-01 01:30:00     NaN
2015-01-01 02:00:00    51.0
2015-01-01 02:30:00     NaN
2015-01-01 03:00:00    48.0
2015-01-01 04:00:00     NaN
2015-01-01 04:30:00     NaN
2015-01-01 05:00:00    47.0
2015-01-01 05:30:00     NaN
2015-01-01 06:00:00    43.0
dtype: float64


In [26]:
ts_sum2 = ts_4 + ts_1
print(ts_sum2)

2015-01-01 00:00:00     NaN
2015-01-01 00:30:00     NaN
2015-01-01 01:00:00    50.0
2015-01-01 01:30:00     NaN
2015-01-01 02:00:00    56.0
2015-01-01 02:30:00     NaN
2015-01-01 03:00:00    46.0
2015-01-01 04:00:00     NaN
2015-01-01 04:30:00     NaN
2015-01-01 05:00:00    50.0
2015-01-01 05:30:00     NaN
2015-01-01 06:00:00    42.0
dtype: float64


## Resampling

In [27]:
date_1 = ['20150101 00:00:00',
             '20150101 01:00:00',
             '20150101 02:00:00',
             '20150101 03:00:00',
             '20150101 04:00:00',
             '20150101 05:00:00',
             '20150101 06:00:00']
temp_1 = [23, 25, 28, 23, 23, 25, 21]
dt_1 = pd.to_datetime(date_1, format = '%Y-%m-%d %H:%M:%S')
ts_1 = pd.Series(temp_1, index = dt_1)
print(ts_1)

2015-01-01 00:00:00    23
2015-01-01 01:00:00    25
2015-01-01 02:00:00    28
2015-01-01 03:00:00    23
2015-01-01 04:00:00    23
2015-01-01 05:00:00    25
2015-01-01 06:00:00    21
dtype: int64


In [28]:
ts_1.resample('D').sum()

2015-01-01    168
Freq: D, dtype: int64

In [29]:
ts_1.resample('2H').mean()

2015-01-01 00:00:00    24.0
2015-01-01 02:00:00    25.5
2015-01-01 04:00:00    24.0
2015-01-01 06:00:00    21.0
Freq: 2H, dtype: float64

In [30]:
ts_1.resample('15min').first()

2015-01-01 00:00:00    23.0
2015-01-01 00:15:00     NaN
2015-01-01 00:30:00     NaN
2015-01-01 00:45:00     NaN
2015-01-01 01:00:00    25.0
2015-01-01 01:15:00     NaN
2015-01-01 01:30:00     NaN
2015-01-01 01:45:00     NaN
2015-01-01 02:00:00    28.0
2015-01-01 02:15:00     NaN
2015-01-01 02:30:00     NaN
2015-01-01 02:45:00     NaN
2015-01-01 03:00:00    23.0
2015-01-01 03:15:00     NaN
2015-01-01 03:30:00     NaN
2015-01-01 03:45:00     NaN
2015-01-01 04:00:00    23.0
2015-01-01 04:15:00     NaN
2015-01-01 04:30:00     NaN
2015-01-01 04:45:00     NaN
2015-01-01 05:00:00    25.0
2015-01-01 05:15:00     NaN
2015-01-01 05:30:00     NaN
2015-01-01 05:45:00     NaN
2015-01-01 06:00:00    21.0
Freq: 15T, dtype: float64

In [31]:
ts_1.resample('15min').ffill()

2015-01-01 00:00:00    23
2015-01-01 00:15:00    23
2015-01-01 00:30:00    23
2015-01-01 00:45:00    23
2015-01-01 01:00:00    25
2015-01-01 01:15:00    25
2015-01-01 01:30:00    25
2015-01-01 01:45:00    25
2015-01-01 02:00:00    28
2015-01-01 02:15:00    28
2015-01-01 02:30:00    28
2015-01-01 02:45:00    28
2015-01-01 03:00:00    23
2015-01-01 03:15:00    23
2015-01-01 03:30:00    23
2015-01-01 03:45:00    23
2015-01-01 04:00:00    23
2015-01-01 04:15:00    23
2015-01-01 04:30:00    23
2015-01-01 04:45:00    23
2015-01-01 05:00:00    25
2015-01-01 05:15:00    25
2015-01-01 05:30:00    25
2015-01-01 05:45:00    25
2015-01-01 06:00:00    21
Freq: 15T, dtype: int64

In [32]:
ts_1.resample('15min').first().interpolate('linear')

2015-01-01 00:00:00    23.00
2015-01-01 00:15:00    23.50
2015-01-01 00:30:00    24.00
2015-01-01 00:45:00    24.50
2015-01-01 01:00:00    25.00
2015-01-01 01:15:00    25.75
2015-01-01 01:30:00    26.50
2015-01-01 01:45:00    27.25
2015-01-01 02:00:00    28.00
2015-01-01 02:15:00    26.75
2015-01-01 02:30:00    25.50
2015-01-01 02:45:00    24.25
2015-01-01 03:00:00    23.00
2015-01-01 03:15:00    23.00
2015-01-01 03:30:00    23.00
2015-01-01 03:45:00    23.00
2015-01-01 04:00:00    23.00
2015-01-01 04:15:00    23.50
2015-01-01 04:30:00    24.00
2015-01-01 04:45:00    24.50
2015-01-01 05:00:00    25.00
2015-01-01 05:15:00    24.00
2015-01-01 05:30:00    23.00
2015-01-01 05:45:00    22.00
2015-01-01 06:00:00    21.00
Freq: 15T, dtype: float64