# Time Series Simulation

***

In [1]:
import pandas as pd
import numpy as np

In [3]:
# https://pandas.pydata.org/docs/reference/api/pandas.date_range.html

In [4]:
# 72 hours starting with midnight Jan 1st, 2011
rng = pd.date_range('1/1/2011', periods=72, freq='H')

In [5]:
rng

DatetimeIndex(['2011-01-01 00:00:00', '2011-01-01 01:00:00',
               '2011-01-01 02:00:00', '2011-01-01 03:00:00',
               '2011-01-01 04:00:00', '2011-01-01 05:00:00',
               '2011-01-01 06:00:00', '2011-01-01 07:00:00',
               '2011-01-01 08:00:00', '2011-01-01 09:00:00',
               '2011-01-01 10:00:00', '2011-01-01 11:00:00',
               '2011-01-01 12:00:00', '2011-01-01 13:00:00',
               '2011-01-01 14:00:00', '2011-01-01 15:00:00',
               '2011-01-01 16:00:00', '2011-01-01 17:00:00',
               '2011-01-01 18:00:00', '2011-01-01 19:00:00',
               '2011-01-01 20:00:00', '2011-01-01 21:00:00',
               '2011-01-01 22:00:00', '2011-01-01 23:00:00',
               '2011-01-02 00:00:00', '2011-01-02 01:00:00',
               '2011-01-02 02:00:00', '2011-01-02 03:00:00',
               '2011-01-02 04:00:00', '2011-01-02 05:00:00',
               '2011-01-02 06:00:00', '2011-01-02 07:00:00',
               '2011-01-

In [8]:
ts = pd.DataFrame(np.random.randn(len(rng)), index=rng)
ts

Unnamed: 0,0
2011-01-01 00:00:00,0.503099
2011-01-01 01:00:00,1.212656
2011-01-01 02:00:00,0.214408
2011-01-01 03:00:00,-0.038409
2011-01-01 04:00:00,0.914820
...,...
2011-01-03 19:00:00,-0.608650
2011-01-03 20:00:00,-0.502850
2011-01-03 21:00:00,0.270773
2011-01-03 22:00:00,-0.248827


**Notes**  
-using loc need to use the label  
-using iloc need to position

In [10]:
ts = pd.DataFrame(np.random.randn(len(rng)), index=rng, columns=["admissions"])
ts

Unnamed: 0,admissions
2011-01-01 00:00:00,0.898744
2011-01-01 01:00:00,-0.040871
2011-01-01 02:00:00,0.516314
2011-01-01 03:00:00,0.699439
2011-01-01 04:00:00,0.217514
...,...
2011-01-03 19:00:00,-1.352861
2011-01-03 20:00:00,0.086058
2011-01-03 21:00:00,0.023656
2011-01-03 22:00:00,0.932952


In [16]:
# Average number of events occuring each time we call this function is 5
# Number of results
np.random.poisson(5, 10)

array([8, 9, 6, 8, 5, 1, 3, 7, 9, 6])

In [18]:
# Simulate some hospital admissions data using numpy, using our datetime index.
ts = pd.DataFrame(np.random.poisson(10, len(rng)), index=rng, columns=["admissions"])
ts

Unnamed: 0,admissions
2011-01-01 00:00:00,5
2011-01-01 01:00:00,17
2011-01-01 02:00:00,13
2011-01-01 03:00:00,6
2011-01-01 04:00:00,7
...,...
2011-01-03 19:00:00,13
2011-01-03 20:00:00,3
2011-01-03 21:00:00,8
2011-01-03 22:00:00,11


<br>

### Accessing and grouping

***

In [24]:
# Calculate the mean number of hospital admissions per hour.
# Should be around 10
ts.mean()

admissions    9.791667
dtype: float64

In [26]:
# Look at a specific day's data using the index.
ts.loc['2011-01-01 03:00:00':'2011-01-01 23:00:00']

Unnamed: 0,admissions
2011-01-01 03:00:00,6
2011-01-01 04:00:00,7
2011-01-01 05:00:00,12
2011-01-01 06:00:00,12
2011-01-01 07:00:00,3
2011-01-01 08:00:00,16
2011-01-01 09:00:00,13
2011-01-01 10:00:00,14
2011-01-01 11:00:00,8
2011-01-01 12:00:00,14


In [27]:
# Calculate that day's mean.
ts.loc['2011-01-01 00:00:00':'2011-01-01 23:00:00'].mean()

admissions    9.916667
dtype: float64

In [22]:
# Use re-sampling to calculate the mean of all days.
ts.resample('D').mean()

Unnamed: 0,admissions
2011-01-01,9.916667
2011-01-02,10.166667
2011-01-03,9.291667
