Import Python libraries

In [None]:
%pip install pandas
%pip install numpy
%pip install matplotlib
%pip install statsmodels

In [None]:
import pandas as pd
import numpy as np

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
from matplotlib import pyplot as plt

# ETL framework

## Extract

NOAA FTP folders for Santa Barbara Airport weather data

In [None]:
df = pd.read_csv("https://www1.ncdc.noaa.gov/pub/data/ghcn/daily/by_station/USW00023190.csv.gz",
                 compression = 'gzip',
                 names = ['station','date','datatype','value','U1','U2','U3','U4'],
                 low_memory = False)

## Transform

Make df DataFrame object into a useable data table

In [None]:
df["date"] = pd.to_datetime(df["date"].astype(str))
df = df.loc[(df["datatype"].isin(["TMIN","TMAX"])) & (df["date"] >= "1970-01-01"), ["date", "datatype", "value"]]
df["value"] = np.round(np.array(df["value"])/10 * 1.8 + 32, 0)
df = df.pivot(index = "date", columns = "datatype", values = "value").dropna().rename_axis(None, axis = 1).reset_index(inplace = False)
df["TAVG"] = (df["TMIN"] + df["TMAX"]) / 2
df = df.rename(columns = {"date": "Date", "TMAX": "High Temperature", "TMIN": "Low Temperature", "TAVG": "Average Temperature"})

Create aggregated DataFrame object

In [None]:
monthly = df[["Date","Average Temperature"]] \
  .dropna() \
    .groupby(pd.Grouper(key = 'Date', axis = 0, freq = 'ME')).mean() \
      .reset_index() \
        .tail(120)

In [None]:
monthly.head()

# Time series decomposition models

Daily average temperatures

In [None]:
series = df["Average Temperature"].tail(1000)
result = seasonal_decompose(series, model = 'additive', period = 365)
result.plot()
plt.show()

Monthly average temperatures

In [None]:
series = monthly["Average Temperature"]
result = seasonal_decompose(series, model = 'additive', period = 12)
result.plot()
plt.show()