# Time Series Basics

## Imports

In [56]:
import pandas as pd
import matplotlib.pyplot as plt

## Load Datetime Direct From CSV

In [13]:
df = pd.read_csv('data/temp.csv', parse_dates=['datetime'], index_col='datetime')

In [15]:
df.head()

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01 00:00:00,11.7,-1.1
2013-01-01 01:00:00,10.7,-1.7
2013-01-01 02:00:00,9.9,-2.0
2013-01-01 03:00:00,9.3,-2.1
2013-01-01 04:00:00,8.8,-2.3


In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 35064 entries, 2013-01-01 00:00:00 to 2016-12-31 23:00:00
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   LA      35062 non-null  float64
 1   NY      35064 non-null  float64
dtypes: float64(2)
memory usage: 821.8 KB


In [17]:
type(df.iloc[0,0])

numpy.float64

In [18]:
df.index

DatetimeIndex(['2013-01-01 00:00:00', '2013-01-01 01:00:00',
               '2013-01-01 02:00:00', '2013-01-01 03:00:00',
               '2013-01-01 04:00:00', '2013-01-01 05:00:00',
               '2013-01-01 06:00:00', '2013-01-01 07:00:00',
               '2013-01-01 08:00:00', '2013-01-01 09:00:00',
               ...
               '2016-12-31 14:00:00', '2016-12-31 15:00:00',
               '2016-12-31 16:00:00', '2016-12-31 17:00:00',
               '2016-12-31 18:00:00', '2016-12-31 19:00:00',
               '2016-12-31 20:00:00', '2016-12-31 21:00:00',
               '2016-12-31 22:00:00', '2016-12-31 23:00:00'],
              dtype='datetime64[ns]', name='datetime', length=35064, freq=None)

## Converting Strings to Datetime Objects

In [25]:
df = pd.read_csv('data/temp.csv')

In [26]:
df.head()

Unnamed: 0,datetime,LA,NY
0,2013-01-01 00:00:00,11.7,-1.1
1,2013-01-01 01:00:00,10.7,-1.7
2,2013-01-01 02:00:00,9.9,-2.0
3,2013-01-01 03:00:00,9.3,-2.1
4,2013-01-01 04:00:00,8.8,-2.3


In [28]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35064 entries, 0 to 35063
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   datetime  35064 non-null  object 
 1   LA        35062 non-null  float64
 2   NY        35064 non-null  float64
dtypes: float64(2), object(1)
memory usage: 821.9+ KB


In [31]:
type(df.datetime[0])

str

In [33]:
pd.to_datetime(df.datetime)

0       2013-01-01 00:00:00
1       2013-01-01 01:00:00
2       2013-01-01 02:00:00
3       2013-01-01 03:00:00
4       2013-01-01 04:00:00
                ...        
35059   2016-12-31 19:00:00
35060   2016-12-31 20:00:00
35061   2016-12-31 21:00:00
35062   2016-12-31 22:00:00
35063   2016-12-31 23:00:00
Name: datetime, Length: 35064, dtype: datetime64[ns]

In [35]:
df = df.set_index(pd.to_datetime(df.datetime)).drop("datetime", axis = 1)

In [37]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 35064 entries, 2013-01-01 00:00:00 to 2016-12-31 23:00:00
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   LA      35062 non-null  float64
 1   NY      35064 non-null  float64
dtypes: float64(2)
memory usage: 821.8 KB


In [38]:
pd.to_datetime("2015-05-20 10:30:20")

Timestamp('2015-05-20 10:30:20')

In [39]:
pd.to_datetime("20150520")

Timestamp('2015-05-20 00:00:00')

In [40]:
pd.to_datetime("2015/05/20")

Timestamp('2015-05-20 00:00:00')

In [41]:
pd.to_datetime("2015 05 20")

Timestamp('2015-05-20 00:00:00')

In [42]:
pd.to_datetime("2015 May 20")

Timestamp('2015-05-20 00:00:00')

In [43]:
pd.to_datetime("May 2015 20")

Timestamp('2015-05-20 00:00:00')

In [44]:
pd.to_datetime("2015 20th may")

Timestamp('2015-05-20 00:00:00')

In [46]:
pd.to_datetime(["2015-05-20", "Feb 20 2015"], format = "mixed") 

DatetimeIndex(['2015-05-20', '2015-02-20'], dtype='datetime64[ns]', freq=None)

In [49]:
pd.to_datetime(["2015-05-20", "Feb 20 2015", "Elephant"], format = "mixed", errors="coerce")

DatetimeIndex(['2015-05-20', '2015-02-20', 'NaT'], dtype='datetime64[ns]', freq=None)

## Initial Analysis and Visualization of Time Series

In [50]:
df.head()

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01 00:00:00,11.7,-1.1
2013-01-01 01:00:00,10.7,-1.7
2013-01-01 02:00:00,9.9,-2.0
2013-01-01 03:00:00,9.3,-2.1
2013-01-01 04:00:00,8.8,-2.3


In [51]:
df.tail()

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-12-31 19:00:00,13.5,4.6
2016-12-31 20:00:00,13.2,5.7
2016-12-31 21:00:00,12.8,5.8
2016-12-31 22:00:00,12.3,5.7
2016-12-31 23:00:00,11.9,5.5


In [52]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 35064 entries, 2013-01-01 00:00:00 to 2016-12-31 23:00:00
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   LA      35062 non-null  float64
 1   NY      35064 non-null  float64
dtypes: float64(2)
memory usage: 821.8 KB


In [53]:
df.describe()

Unnamed: 0,LA,NY
count,35062.0,35064.0
mean,17.486016,12.068269
std,6.640666,10.466832
min,-6.6,-22.4
25%,12.9,3.9
50%,17.2,12.5
75%,21.9,20.6
max,42.3,37.1


In [55]:
df.LA.value_counts()

LA
 16.2    238
 16.7    237
 15.2    234
 18.2    231
 16.6    228
        ... 
 40.7      1
 39.5      1
 37.0      1
 37.6      1
-3.3       1
Name: count, Length: 442, dtype: int64