In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [None]:
starbucks_stocks = pd.read_csv('starbucks.csv', index_col='Date', parse_dates=True)

In [None]:
starbucks_stocks.head()

In [None]:
starbucks_stocks.index

In [None]:
print(starbucks_stocks.index[3]-starbucks_stocks.index[2])

In [None]:
# Time plot of Starbucks stocks closing price
starbucks_stocks['Close'].plot(figsize=(20, 8));

In [None]:
airline_passengers = pd.read_csv('airline_passengers.csv', index_col='Month', parse_dates=True)

In [None]:
airline_passengers.head()

In [None]:
airline_passengers['Passengers'].plot(figsize=(20, 8));

In [None]:
# In the following data set some rows are deleted, i.e. they are not set to NA but the entries are simply removed from the dataset.
airline_passengers2 = pd.read_csv('airline_passengers_missingrows.csv', index_col='Month', parse_dates=True)
# pd.plot simply draws a line between subsequent observations, whether there are missing months between them or not. -> Careful when examining 
# a yet unknown time series via pd.plot()
airline_passengers2['Passengers'].plot(figsize=(20, 8));

In [None]:
# See for which months there are data entries
airline_passengers2.index[airline_passengers2.index.year < 1952]

In [None]:
# insert rows for the missing months manually
# create variables with start and end date of series
start = airline_passengers2.index[0].date()
end = airline_passengers2.index[len(airline_passengers2)-1].date()

# create a series of months from the start date of the series to the end date (freq='MS' means start of the month, 'M' would give end of month)
# See here for a list of frequency aliases in panda: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases
new_dates = pd.date_range(start=start,end=end,freq='MS')
print(new_dates)

In [None]:
# overwrite the index to also include the dates for which no info is available.
airline_passengers2 = airline_passengers2.reindex(new_dates)
print(airline_passengers2['Passengers'][airline_passengers2.index.year < 1952])

In [None]:
airline_passengers2['Passengers'].plot(figsize=(20, 8));

In [None]:
sp500_close = pd.read_csv('sp500_close.csv', index_col=0, parse_dates=True)
sp500_close.head()

In [None]:
# As the stock market is closed on Saturday, Sunday and holidays,there are no closing prices for these days. Here it doesn't make much sense 
# to plot the rows with missing values
sp500_close['AAL'].plot(figsize=(20, 8));

In [None]:
aal = sp500_close[['AAL']].copy().dropna()
aal.plot(figsize=(20, 8));

In [None]:
############ Changing the granularity of a series
starbucks_stocks.head()

In [None]:
# Take the annual mean
starbucks_stocks.resample(rule='A').mean()

In [None]:
# Take the weekly mean
starbucks_stocks.resample(rule='W').mean()

In [None]:
# Define a new function to take the first observation of each period
def first_observation(entry):
    if len(entry):  # handles the case of missing data
        return entry.iloc[0]
# Take the first observation per week
starbucks_stocks.resample(rule='W').apply(first_observation)

In [None]:
temperature = pd.read_csv('temperatures.csv', index_col='time', parse_dates=True)
temperature.head()

In [None]:
temperature.index[3]-temperature.index[2]

In [None]:
# Take the first observation in every hour
temperature.resample(rule='h').apply(first_observation)