# Time Series Basics

## Importing Time Series Data from csv-Files

In [29]:
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Access the PROJECT_PATH variable
project_files = os.getenv("PROJECT_FILES_PT2")

In [30]:
import pandas as pd

In [40]:
temp = pd.read_csv(os.path.join(project_files, 'temp.csv'), parse_dates = ["datetime"], index_col= "datetime")

In [41]:
temp.head()

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01 00:00:00,11.7,-1.1
2013-01-01 01:00:00,10.7,-1.7
2013-01-01 02:00:00,9.9,-2.0
2013-01-01 03:00:00,9.3,-2.1
2013-01-01 04:00:00,8.8,-2.3


In [42]:
temp.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 35064 entries, 2013-01-01 00:00:00 to 2016-12-31 23:00:00
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   LA      35062 non-null  float64
 1   NY      35064 non-null  float64
dtypes: float64(2)
memory usage: 821.8 KB


In [39]:
type(temp.iloc[0, 0])

pandas._libs.tslibs.timestamps.Timestamp

In [43]:
temp.index

DatetimeIndex(['2013-01-01 00:00:00', '2013-01-01 01:00:00',
               '2013-01-01 02:00:00', '2013-01-01 03:00:00',
               '2013-01-01 04:00:00', '2013-01-01 05:00:00',
               '2013-01-01 06:00:00', '2013-01-01 07:00:00',
               '2013-01-01 08:00:00', '2013-01-01 09:00:00',
               ...
               '2016-12-31 14:00:00', '2016-12-31 15:00:00',
               '2016-12-31 16:00:00', '2016-12-31 17:00:00',
               '2016-12-31 18:00:00', '2016-12-31 19:00:00',
               '2016-12-31 20:00:00', '2016-12-31 21:00:00',
               '2016-12-31 22:00:00', '2016-12-31 23:00:00'],
              dtype='datetime64[ns]', name='datetime', length=35064, freq=None)

In [44]:
temp.index[0]

Timestamp('2013-01-01 00:00:00')

## Converting strings to datetime objects with pd.to_datetime()

In [None]:
import pandas as pd

In [None]:
temp = pd.read_csv("temp.csv")

In [None]:
temp.head()

In [None]:
temp.info()

In [None]:
temp.datetime[0]

In [None]:
pd.to_datetime(temp.datetime)

In [None]:
temp = temp.set_index(pd.to_datetime(temp.datetime)).drop("datetime", axis = 1)

In [None]:
temp.head()

In [None]:
temp.info()

In [None]:
temp.index[0]

In [None]:
pd.to_datetime("2015-05-20 10:30:20")

In [None]:
pd.to_datetime("20150520")

In [None]:
pd.to_datetime("2015/05/20")

In [None]:
pd.to_datetime("2015 05 20")

In [None]:
#pd.to_datetime("2015-20-05")

In [None]:
pd.to_datetime("2015 May 20")

In [None]:
pd.to_datetime("May 2015 20")

In [None]:
pd.to_datetime("2015 20th may")

In [None]:
#pd.to_datetime(["2015-05-20", "Feb 20 2015"]) # old

In [None]:
pd.to_datetime(["2015-05-20", "Feb 20 2015"], format = "mixed") # new

In [None]:
pd.to_datetime(["2015-05-20", "Feb 20 2015", "Elephant"], errors="coerce") # old

In [None]:
pd.to_datetime(["2015-05-20", "Feb 20 2015", "Elephant"], format = "mixed", errors="coerce") # new

## Indexing and Slicing Time Series

In [None]:
import pandas as pd

In [None]:
temp = pd.read_csv("temp.csv", parse_dates= ["datetime"], index_col= "datetime")

In [None]:
temp.head()

In [None]:
temp.info()

In [None]:
temp.loc["2013-01-01 01:00:00"]

In [None]:
temp.loc["2015"]

In [None]:
temp.loc["2015-05"]

In [None]:
temp.loc["2015-05-20"].shape

In [None]:
temp.loc["2015-05-20 10:00:00"]

In [None]:
#temp.loc["2015-05-20 10:30:00"]

In [None]:
temp.loc["2015-01-01" : "2015-12-31"]

In [None]:
temp.loc["2015-01-01" : "2015-12-31"].equals(temp.loc["2015"])

In [None]:
temp.loc["2015-04-15" : "2016-02-23"]

In [None]:
temp.loc["2015-05-20":]

In [None]:
temp.loc[:"2015-05-20"]

In [None]:
temp.loc["20FEBRUARY2015"]

In [None]:
temp.loc[["2015-05-20 10:00:00", "2015-05-20 12:00:00"]] # now works

In [None]:
two_timestamps = pd.to_datetime(["2015-05-20 10:00:00", "2015-05-20 12:00:00"])
two_timestamps

In [None]:
temp.loc[two_timestamps]

## Downsampling Time Series with resample()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use("seaborn-v0_8")

In [None]:
temp = pd.read_csv("temp.csv", parse_dates= ["datetime"], index_col = "datetime")

In [None]:
temp.head()

In [None]:
temp.info()

In [None]:
temp.resample("D")

In [None]:
list(temp.resample("D"))[1][1]

In [None]:
temp.head(25)

In [None]:
temp.resample("D").sum()

In [None]:
temp.resample("2h").last() # use h instead of H

In [None]:
temp.resample("W").mean()

In [None]:
temp.resample("W-Wed").mean()

In [None]:
temp.resample("ME").mean() # use ME instead of M

In [None]:
temp.resample("MS").mean()

In [None]:
# temp.resample("MS", loffset="14D").mean() # old

In [None]:
# new (Alt 1)
temp.resample("MS").mean().shift(14, freq="D")

In [None]:
# new (Alt 2)
from pandas.tseries.frequencies import to_offset
df2 = temp.resample("MS").mean()
df2.index = df2.index + to_offset("14D")
df2

In [None]:
temp.resample("QE").mean() # use QE instead of Q

In [None]:
temp.resample("QE-FEB").mean() # use QE-FEB instead of QE-Feb

In [None]:
temp.resample("YE").mean() # use YE instead of A

In [None]:
temp.resample("YS").mean() # use YS instead of AS