# Decompose #2

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('monthly_in_situ_co2_mlo.csv')

In [None]:
df.columns

In [None]:
df['Date']

In [None]:
df = df.loc[2:774]

In [None]:
df.loc[df['Date']>1989].plot(x='Date',y='fit')

In [None]:
a = df.loc[df['Date']>1989]

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose

ss_decomposition = seasonal_decompose(x=a['fit'], model='additive', period=12)
estimated_trend = ss_decomposition.trend
estimated_seasonal = ss_decomposition.seasonal
estimated_residual = ss_decomposition.resid

In [None]:
import matplotlib.pyplot as plt

In [None]:
fig, axes = plt.subplots(4, 1, sharex=True, sharey=False)
fig.set_figheight(10)
fig.set_figwidth(15)

axes[0].plot(a['Date'], a['fit'], label='Original')
axes[0].legend(loc='upper left');

axes[1].plot(a['Date'], estimated_trend, label='Trend')
axes[1].legend(loc='upper left');

axes[2].plot(a['Date'], estimated_seasonal, label='Seasonality')
axes[2].legend(loc='upper left');

axes[3].plot(a['Date'], estimated_residual, label='Residuals')
axes[3].legend(loc='upper left');

In [None]:
plt.plot(a['Date'], estimated_seasonal, label='Seasonality')

In [None]:
estimated_seasonal

In [None]:
plt.plot(a.loc[714:774, 'Date'], estimated_seasonal.loc[714:775], label='Seasonality')

In [None]:
from statsmodels.tsa.seasonal import STL

res = STL(a['fit'], period=12).fit()
res.plot()
plt.show()

## Let's try this out with stock data we saw before

In [None]:
import yahoo_fin.stock_info as si
import requests
import pandas as pd
import matplotlib.pyplot as plt
import ipywidgets

In [None]:
# Getting the actual company name from a stock ticker symbol
def get_symbol(symbol):
    return si.get_quote_data(symbol)['shortName']

In [None]:
get_symbol('AAPL')

In [None]:
get_symbol('GOOG')

In [None]:
few_days = si.get_data('aapl', start_date = '01/01/2020', end_date = '10/30/2022')

In [None]:
goog_days = pd.concat([si.get_data('aapl', start_date = '01/01/2020', end_date = '10/30/2022'),
                       si.get_data('goog', start_date = '01/01/2020', end_date = '10/30/2022')])

In [None]:
few_days

In [None]:
goog_days = goog_days.sort_index()

In [None]:
# The Matplotlib way

fig,ax = plt.subplots(1,1,figsize=(7,5))
ax.plot(few_days.index, few_days.high)
ax.set_title(get_symbol('AAPL'))
fig.autofmt_xdate()

In [None]:
# The Matplotlib way

fig,ax = plt.subplots(1,1,figsize=(7,5))
ax.plot(goog_days.index, goog_days.high)
ax.set_title(get_symbol('AAPL'))
fig.autofmt_xdate()

In [None]:
# The Matplotlib way

fig,ax = plt.subplots(1,1,figsize=(7,5))
ax.plot(goog_days.loc[goog_days['ticker']=='AAPL'].index, goog_days.loc[goog_days['ticker']=='AAPL'].high)
ax.plot(goog_days.loc[goog_days['ticker']=='GOOG'].index, goog_days.loc[goog_days['ticker']=='GOOG'].high)
ax.set_title(get_symbol('AAPL'))
fig.autofmt_xdate()

In [None]:
# The Pandas way

few_days.plot(y='high');

In [None]:
# The Pandas way

goog_days.plot(y='high');

In [None]:
few_days.plot(y='high',
              figsize=(7,5),
              title=get_symbol('AAPL'),
              legend=False);

In [None]:
few_days[:10]

In [None]:
few_days[:10].plot(y='high',
              figsize=(7,5),
              title=get_symbol('AAPL'),
              legend=False);

In [None]:
few_days[:10].dtypes

In [None]:
few_days[:10].info()

In [None]:
print(few_days.index)

In [None]:
few_days.index.day

In [None]:
few_days.index.week

In [None]:
few_days.index.isocalendar().week

In [None]:
date_range = few_days.index.max() - few_days.index.min()

print(date_range.days)
print(len(few_days.index.unique()))

In [None]:
new_index = pd.date_range(few_days.index.min(), 
                          few_days.index.max())
new_index

In [None]:
#few_days_filled = few_days.reindex(new_index, fill_value = 0) 
#few_days_filled = few_days.reindex(new_index, fill_value = None) 
few_days_filled = few_days.reindex(new_index, method = 'ffill')

In [None]:
few_days_filled

In [None]:
few_days_filled[:10].plot(y='high',
              figsize=(7,5),
              title=get_symbol('AAPL'),
              legend=False);

In [None]:
few_days_filled['open'].resample('W').mean()

In [None]:
few_days_filled['open'].resample('M').mean()

In [None]:
few_days_filled['open'].resample('Q').mean()

In [None]:
few_days_filled['open'].resample('Y').mean()

In [None]:
open_weekly = few_days_filled['open'].resample('W').mean()

In [None]:
open_weekly

In [None]:
high_weekly = few_days_filled['high'].resample('W').max()

In [None]:
high_weekly

In [None]:
high_weekly.plot()
open_weekly.plot()

In [None]:
low_weekly = few_days_filled['low'].resample('W').min()

In [None]:
low_weekly

In [None]:
high_weekly.plot()
open_weekly.plot()
low_weekly.plot()

In [None]:
few_days_filled = few_days_filled.drop('ticker',axis=1)

In [None]:
windowsize = 7
rolling = few_days_filled.rolling(windowsize)

In [None]:
rolling

In [None]:
rolling.mean().dropna()

In [None]:
windowsize = 7
rolling1 = few_days_filled.rolling(windowsize*1)
rolling2 = few_days_filled.rolling(windowsize*10)
rolling3 = few_days_filled.rolling(windowsize*20)

rolling1.mean().dropna()['open'].plot()
rolling2.mean().dropna()['open'].plot()
rolling3.mean().dropna()['open'].plot()

In [None]:
windowsize = 7
rolling1 = few_days_filled.rolling(windowsize*1)
rolling2 = few_days_filled.rolling(windowsize*10)
rolling3 = few_days_filled.rolling(windowsize*20)

rolling1.mean().dropna().iloc[1:140,0].plot()
rolling2.mean().dropna().iloc[1:70,0].plot()
rolling3.mean().dropna().iloc[1:10,0].plot()

In [None]:
windowsize = 7
rolling = few_days_filled.rolling(windowsize)

print(rolling.mean().dropna()['volume'].head())
print(rolling.std().dropna()['volume'].head())
print(few_days_filled.cumsum().dropna()['volume'].head())

# Exercise

Try analyzing the following COVID dataset.

https://data.chhs.ca.gov/dataset/covid-19-time-series-metrics-by-county-and-state/resource/046cdd2b-31e5-4d34-9ed3-b48cdbc4be7a

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('covid-deaths-ca-102422.csv')