# **Practical Time Series Forecasting with R**
## Chapter 8: Forecasting Binary Outcomes

In [28]:
## ESTABLISH ENVIRONMENT
# Built-in libraries
from datetime import datetime
from pathlib import Path

# Third-party libraries for data handling and scientific computation
import numpy as np
import pandas as pd

# Libraries for preprocessing and visualization
from sklearn.preprocessing import PolynomialFeatures
import matplotlib.dates as mdates
import matplotlib.pyplot as plt

### IMPORTING DATA ###
# Define Path
Data_Repo = Path("/Users/[Insert Path to Data]/")

#Data

rain_df = pd.read_excel(Data_Repo / "MelbourneRainfall.xls" )
rain_df.rename(columns={"Rainfall amount (millimetres)": "Rainfall"}, inplace=True)

In [29]:
## Table 8.2: Summary of logistic regression model output
## Table 8.3: Summary of logistic regression's predictive performance in training and validation periods

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix

rain_df['Date'] = pd.to_datetime(rain_df['Date'], format="%m/%d/%Y")
rain_df['Rainy'] = np.where(rain_df['Rainfall'] > 0, 1, 0)
nPeriods = len(rain_df['Rainy'])
rain_df['Lag1'] = [np.nan] + list(rain_df['Rainfall'][:nPeriods-1])
rain_df['t'] = np.arange(1, nPeriods+1, 1)
rain_df['Seasonal_sine'] = np.sin(2 * np.pi * rain_df['t'] / 365.25)
rain_df['Seasonal_cosine'] = np.cos(2 * np.pi * rain_df['t'] / 365.25)

train_df = rain_df[rain_df['Date'] <= pd.to_datetime("12/31/2009", format="%m/%d/%Y")]
train_df = train_df.iloc[1:]
valid_df = rain_df[rain_df['Date'] > pd.to_datetime("12/31/2009", format="%m/%d/%Y")]
xvalid = valid_df.iloc[:, [3, 5, 6]]

rainy_lr = LogisticRegression()
rainy_lr.fit(train_df[['Lag1', 'Seasonal_sine', 'Seasonal_cosine']], train_df['Rainy'])

# Table 8.2
print("Intercept:", rainy_lr.intercept_)
print("Coefficients:", rainy_lr.coef_)


rainy_lr_pred = rainy_lr.predict_proba(xvalid)[:, 1]

# Table 8.3
train_cm = confusion_matrix(train_df['Rainy'], np.where(rainy_lr.predict_proba(train_df[['Lag1', 'Seasonal_sine', 'Seasonal_cosine']])[:, 1] > 0.5, 1, 0))
valid_cm = confusion_matrix(valid_df['Rainy'], np.where(rainy_lr_pred > 0.5, 1, 0))

print ( 'Confusion Matrix: train')
print(train_cm)
print ( 'Confusion Matrix: valid')
print(valid_cm)




Intercept: [-0.76877475]
Coefficients: [[ 0.11185534 -0.26814291 -0.37038982]]
Confusion Matrix: train
[[2251  104]
 [1115  182]]
Confusion Matrix: valid
[[373  21]
 [220  55]]
