# Part 4: Managing Time Series Data With Pandas

## Time Series Basics

### Importing Time Series Data from csv-Files

In [None]:
import pandas as pd

In [None]:
temp = pd.read_csv("temp.csv", parse_dates=["datetime"], index_col= "datetime")

In [None]:
temp.head()

In [None]:
temp.info()

In [None]:
type(temp.iloc[0, 0])

In [None]:
temp.index

In [None]:
temp.index[0]

### Converting strings to datetime objects with pd.to_datetime()

In [1]:
import pandas as pd

In [2]:
temp = pd.read_csv("temp.csv")

In [None]:
temp.head()

In [None]:
temp.info()

In [None]:
temp.datetime[0]

In [3]:
pd.to_datetime(temp.datetime)

0       2013-01-01 00:00:00
1       2013-01-01 01:00:00
2       2013-01-01 02:00:00
3       2013-01-01 03:00:00
4       2013-01-01 04:00:00
                ...        
35059   2016-12-31 19:00:00
35060   2016-12-31 20:00:00
35061   2016-12-31 21:00:00
35062   2016-12-31 22:00:00
35063   2016-12-31 23:00:00
Name: datetime, Length: 35064, dtype: datetime64[ns]

In [4]:
temp = temp.set_index(pd.to_datetime(temp.datetime)).drop("datetime", axis = 1)

In [5]:
temp.head()

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01 00:00:00,11.7,-1.1
2013-01-01 01:00:00,10.7,-1.7
2013-01-01 02:00:00,9.9,-2.0
2013-01-01 03:00:00,9.3,-2.1
2013-01-01 04:00:00,8.8,-2.3


In [6]:
temp.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 35064 entries, 2013-01-01 00:00:00 to 2016-12-31 23:00:00
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   LA      35062 non-null  float64
 1   NY      35064 non-null  float64
dtypes: float64(2)
memory usage: 821.8 KB


In [7]:
temp.index[0]

Timestamp('2013-01-01 00:00:00')

In [8]:
pd.to_datetime("2015-05-20 10:30:20")

Timestamp('2015-05-20 10:30:20')

In [9]:
pd.to_datetime("20150520")

Timestamp('2015-05-20 00:00:00')

In [10]:
pd.to_datetime("2015/05/20")

Timestamp('2015-05-20 00:00:00')

In [11]:
pd.to_datetime("2015 05 20")

Timestamp('2015-05-20 00:00:00')

In [None]:
#pd.to_datetime("2015-20-05")

In [12]:
pd.to_datetime("2015 May 20")

Timestamp('2015-05-20 00:00:00')

In [13]:
pd.to_datetime("May 2015 20")

Timestamp('2015-05-20 00:00:00')

In [None]:
pd.to_datetime("2015 20th may")

In [15]:
pd.to_datetime(["2015-05-20", "Feb 20 2015"])

ValueError: time data "Feb 20 2015" doesn't match format "%Y-%m-%d", at position 1. You might want to try:
    - passing `format` if your strings have a consistent format;
    - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
    - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.

In [14]:
pd.to_datetime(["2015-05-20", "Feb 20 2015", "Elephant"], errors="coerce")

DatetimeIndex(['2015-05-20', 'NaT', 'NaT'], dtype='datetime64[ns]', freq=None)

### Initial Analysis / Visual Inspection of Time Series

In [None]:
temp.head()

In [None]:
temp.tail()

In [None]:
temp.info()

In [None]:
temp.describe()

In [None]:
temp.LA.value_counts()

In [None]:
import matplotlib.pyplot as plt

In [None]:
temp.plot(figsize = (15, 7), subplots=True, layout=(1, 2), sharey=True)
plt.show()

### Indexing and Slicing Time Series

In [None]:
import pandas as pd

In [None]:
temp = pd.read_csv("temp.csv", parse_dates= ["datetime"], index_col= "datetime")

In [None]:
temp.head()

In [None]:
temp.info()

In [None]:
temp.loc["2013-01-01 01:00:00"]

In [None]:
temp.loc["2015"]

In [None]:
temp.loc["2015-05"]

In [None]:
temp.loc["2015-05-20"].shape

In [None]:
temp.loc["2015-05-20 10:00:00"]

In [None]:
#temp.loc["2015-05-20 10:30:00"]

In [None]:
temp.loc["2015-01-01" : "2015-12-31"]

In [None]:
temp.loc["2015-01-01" : "2015-12-31"].equals(temp.loc["2015"])

In [None]:
temp.loc["2015-04-15" : "2016-02-23"]

In [None]:
temp.loc["2015-05-20":]

In [None]:
temp.loc[:"2015-05-20"]

In [None]:
temp.loc["20FEBRUARY2015"]

In [None]:
#temp.loc[["2015-05-20 10:00:00", "2015-05-20 12:00:00"]]

In [None]:
two_timestamps = pd.to_datetime(["2015-05-20 10:00:00", "2015-05-20 12:00:00"])
two_timestamps

In [None]:
temp.loc[two_timestamps]

### Creating a customized DatetimeIndex with pd.date_range()

In [None]:
import pandas as pd

In [None]:
pd.to_datetime(["2015-05-20", "Feb 20 2015"])

In [None]:
pd.date_range(start = "2015-07-01", end = "2015-07-31", freq= "D")

In [None]:
pd.date_range(start = "2015-07-01", periods = 31, freq = "D")

In [None]:
pd.date_range(end = "2015-07-31", periods = 31, freq = "D")

In [None]:
pd.date_range(start = "2015-07-01", end = "2015-07-31", freq = "B")

In [None]:
pd.date_range(start = "2015-07-31", periods = 10, freq = "H")

In [None]:
pd.date_range(start = "2015-07-01", periods = 6,  freq = "W")

In [None]:
pd.date_range(start = "2015-07-01", periods = 6,  freq = "W-Wed")

In [None]:
pd.date_range(start = "2015-07-14", periods = 6,  freq = "M")

In [None]:
pd.date_range(start = "2015-07-14", periods = 6,  freq = "MS")

In [None]:
pd.date_range(start = "2015-07-14", periods = 6,  freq = pd.DateOffset(months = 1))

In [None]:
pd.date_range(start = "2015-07-14", periods = 6,  freq = "Q")

In [None]:
pd.date_range(start = "2015-07-14", periods = 6,  freq = "QS")

In [None]:
pd.date_range(start = "2015-07-14", periods = 6,  freq = "QS-May")

In [None]:
pd.date_range(start = "2015-07-14", periods = 6,  freq = "A")

In [None]:
pd.date_range(start = "2015-07-14", periods = 6,  freq = "AS")

In [None]:
pd.date_range(start = "2015-07-14", periods = 6,  freq = "AS-Jul")

In [None]:
pd.date_range(end = "2018-11-24", periods = 10,  freq = pd.DateOffset(years = 1))

### More on pd.date_range()

In [None]:
import pandas as pd

In [None]:
pd.date_range(start = "2015-07-01", periods = 10, freq = "3D8H")

### Downsampling Time Series with resample() (Part 1)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use("seaborn")

In [None]:
temp = pd.read_csv("temp.csv", parse_dates= ["datetime"], index_col = "datetime")

In [None]:
temp.head()

In [None]:
temp.info()

In [None]:
list(temp.resample("D"))[1][1]

In [None]:
temp.head(25)

In [None]:
temp.resample("D").sum()

In [None]:
temp.resample("2H").first()

In [None]:
temp.resample("W").mean()

In [None]:
temp.resample("W-Wed").mean()

In [None]:
temp.resample("M").mean()

In [None]:
temp.resample("MS").mean()

In [None]:
temp.resample("MS", loffset="14D").mean()

In [None]:
temp.resample("Q").mean()

In [None]:
temp.resample("Q-Feb").mean()

In [None]:
temp.resample("Y").mean()

In [None]:
temp.resample("YS").mean()

### Downsampling Time Series with resample (Part 2)

In [None]:
temp.resample("M", kind = "period").mean()

In [None]:
temp.resample("W", kind = "period").mean()

In [None]:
temp.resample("Q", kind = "period").mean()

In [None]:
temp.resample("A", kind = "period").mean()

In [None]:
temp_m = temp.resample("M", kind = "period").mean()

In [None]:
temp_m

In [None]:
temp_m.info()

In [None]:
temp_m.index[0]

In [None]:
temp_m.plot(figsize = (15, 8), fontsize = 15)
plt.show()

In [None]:
temp.plot(figsize = (15, 8), fontsize = 15)
plt.show()

### The PeriodIndex Object

In [None]:
import pandas as pd

In [None]:
temp = pd.read_csv("temp.csv", parse_dates= ["datetime"], index_col = "datetime")

In [None]:
temp.head()

In [None]:
temp.tail()

In [None]:
temp.info()

In [None]:
temp_m = temp.resample("M", kind = "period").mean()
temp_m.head(12)

In [None]:
temp_m.info()

In [None]:
temp_m.index

In [None]:
temp_m.loc["2013-01"]

In [None]:
temp_m.loc["2013-05":"2013-08"]

In [None]:
temp_m.loc["2013"]

In [None]:
temp_m.to_timestamp(how = "start")

### Advanced Indexing with reindex()

In [None]:
import pandas as pd

In [None]:
temp = pd.read_csv("temp.csv", parse_dates= ["datetime"], index_col = "datetime")

In [None]:
temp.head()

In [None]:
temp.tail()

In [None]:
temp_d = temp.resample("D").mean()
temp_d

In [None]:
birthd = pd.date_range(end = "2018-12-24", periods = 10,  freq = pd.DateOffset(years = 1))
birthd

In [None]:
#temp_d.loc[birthd]

In [None]:
temp_d.reindex(birthd)

In [None]:
temp_d.head()

In [None]:
temp_d.tail()