In [None]:
!pip install -q yfinance

In [None]:
import yfinance as yf

# Define the ticker symbol for Aus Westfarmers
ticker_symbol = 'WES.AX'

# Fetch the stock price data
westfarmers_data = yf.download(ticker_symbol, start='2020-01-01', end='2023-12-31')

# Display the first few rows of the dataset
westfarmers_data.head()

In [None]:
# Check for missing values in the dataset
missing_values = westfarmers_data.isnull().sum()
missing_values

In [None]:
# Drop irrelevant columns
columns_to_drop = ['Open', 'High', 'Low', 'Adj Close']
westfarmers_data_cleaned = westfarmers_data.drop(columns=columns_to_drop)

# Display the first few rows of the cleaned dataset
westfarmers_data_cleaned.head()

In [None]:
import matplotlib.pyplot as plt
# Visualizing the cleaned dataset
plt.figure(figsize=(14, 7))
westfarmers_data_cleaned['Close'].plot(title='Westfarmers Closing Prices')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.grid(True)
plt.show()

In [None]:
from statsmodels.tsa.stattools import adfuller

# Perform Augmented Dickey-Fuller test
result = adfuller(westfarmers_data_cleaned['Close'])

# Extract and display test statistics
adf_statistic = result[0]
p_value = result[1]
critical_values = result[4]

adf_statistic, p_value, critical_values

In [None]:
# Apply differencing to the 'Close' column
westfarmers_data_diff = westfarmers_data_cleaned['Close'].diff().dropna()

# Perform Augmented Dickey-Fuller test on differenced data
result_diff = adfuller(westfarmers_data_diff)

# Extract and display test statistics for differenced data
adf_statistic_diff = result_diff[0]
p_value_diff = result_diff[1]
critical_values_diff = result_diff[4]

adf_statistic_diff, p_value_diff, critical_values_diff

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# Plot ACF and PACF
fig, ax = plt.subplots(1, 2, figsize=(14, 4))

# ACF plot
plot_acf(westfarmers_data_diff, ax=ax[0], lags=40)

# PACF plot
plot_pacf(westfarmers_data_diff, ax=ax[1], lags=40)

plt.tight_layout()
plt.show()

In [None]:
from statsmodels.tsa.arima.model import ARIMA

# Fit ARIMA(1,1,1) model
model = ARIMA(westfarmers_data_cleaned['Close'], order=(1,1,1))
results = model.fit()

# Display model summary
results.summary()

In [None]:
# Forecast the next 30 days
forecast_steps = 30
forecast = results.get_forecast(steps=forecast_steps)
mean_forecast = forecast.predicted_mean
confidence_intervals = forecast.conf_int()

# Adjusting the forecast's index to start after the last date in our dataset
forecast_start_date = westfarmers_data_cleaned.index[-1] + pd.Timedelta(days=1)
forecast_end_date = forecast_start_date + pd.Timedelta(days=forecast_steps-1)
forecast_dates = pd.date_range(forecast_start_date, forecast_end_date)
mean_forecast.index = forecast_dates
confidence_intervals.index = forecast_dates

# Plotting the forecast
plt.figure(figsize=(14, 7))
westfarmers_data_cleaned['Close'].plot(label='Past Closing Prices')
mean_forecast.plot(label='Forecast', color='red')
plt.fill_between(confidence_intervals.index,
                 confidence_intervals.iloc[:, 0],
                 confidence_intervals.iloc[:, 1], color='pink', alpha=0.3)
plt.title('Westfarmers Closing Price Forecast')
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Fit the SARIMA model with basic parameters
basic_sarima_model = SARIMAX(westfarmers_data_cleaned['Close'],
                            order=(1, 1, 1),
                            seasonal_order=(1, 1, 1, 12),
                            enforce_stationarity=False,
                            enforce_invertibility=False)
basic_sarima_results = basic_sarima_model.fit()

In [None]:
# Adjusting the forecast dates
forecast_start_date = westfarmers_data_cleaned.index[-1] + pd.Timedelta(days=1)
forecast_end_date = forecast_start_date + pd.Timedelta(days=29)
forecast_dates = pd.date_range(forecast_start_date, forecast_end_date)
basic_mean_forecast.index = forecast_dates
basic_confidence_intervals.index = forecast_dates

# Plotting the adjusted forecast
plt.figure(figsize=(14, 7))
westfarmers_data_cleaned['Close'].plot(label='Past Closing Prices')
basic_mean_forecast.plot(label='Forecast', color='red')
plt.fill_between(basic_confidence_intervals.index,
                 basic_confidence_intervals.iloc[:, 0],
                 basic_confidence_intervals.iloc[:, 1], color='pink', alpha=0.3)
plt.title('Adjusted Westfarmers Closing Price Basic SARIMA Forecast')
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Zooming in on the last 6 months of historical data and the forecast
zoom_start_date = westfarmers_data_cleaned.index[-1] - pd.Timedelta(days=180)

plt.figure(figsize=(14, 7))
westfarmers_data_cleaned['Close'][zoom_start_date:].plot(label='Past 6 Months Closing Prices')
basic_mean_forecast.plot(label='Forecast', color='red')
plt.fill_between(basic_confidence_intervals.index,
                 basic_confidence_intervals.iloc[:, 0],
                 basic_confidence_intervals.iloc[:, 1], color='pink', alpha=0.3)
plt.title('Zoomed-in Westfarmers Closing Price Basic SARIMA Forecast')
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
# Decompose the time series using the 'period' parameter
decomposition = seasonal_decompose(westfarmers_data_cleaned['Close'], model='multiplicative', period=12)

# Plot the decomposed components
fig = decomposition.plot()
fig.set_size_inches(14, 7)
plt.suptitle('Time Series Decomposition of Westfarmers Closing Prices')
plt.show()

In [None]:
# Creating lag features
westfarmers_data_cleaned['Lag_1'] = westfarmers_data_cleaned['Close'].shift(1)
westfarmers_data_cleaned['Lag_2'] = westfarmers_data_cleaned['Close'].shift(2)
westfarmers_data_cleaned['Lag_3'] = westfarmers_data_cleaned['Close'].shift(3)

# Creating rolling window statistics
westfarmers_data_cleaned['Rolling_Mean_7'] = westfarmers_data_cleaned['Close'].rolling(window=7).mean()
westfarmers_data_cleaned['Rolling_Std_7'] = westfarmers_data_cleaned['Close'].rolling(window=7).std()

# Visualizing the original closing prices with the engineered features
plt.figure(figsize=(14, 7))
westfarmers_data_cleaned['Close'].plot(label='Closing Prices')
westfarmers_data_cleaned['Rolling_Mean_7'].plot(label='7-Day Rolling Mean', linestyle='--')
westfarmers_data_cleaned['Rolling_Std_7'].plot(label='7-Day Rolling Std. Dev.', linestyle=':')
plt.title('Westfarmers Closing Prices with Engineered Features')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

# Dropping NA values (due to lag and rolling features)
data_with_features = westfarmers_data_cleaned.dropna()

# Defining features and target variable
features = ['Lag_1', 'Lag_2', 'Lag_3', 'Rolling_Mean_7', 'Rolling_Std_7']
X = data_with_features[features]
y = data_with_features['Close']

# Splitting the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Training the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predicting on the test set
y_pred = model.predict(X_test)

# Calculating the Mean Squared Error (MSE) for the predictions
mse = mean_squared_error(y_test, y_pred)
mse

In [None]:
plt.figure(figsize=(14, 7))
y_test.plot(label='Actual Closing Prices')
plt.plot(y_test.index, y_pred, label='Predicted Closing Prices', color='red', linestyle='--')
plt.title('Actual vs. Predicted Westfarmers Closing Prices')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# Splitting the data into training and testing sets (80% train, 20% test)
train_data = westfarmers_data_cleaned['Close'].iloc[:-int(0.2*len(westfarmers_data_cleaned))]
test_data = westfarmers_data_cleaned['Close'].iloc[-int(0.2*len(westfarmers_data_cleaned)):]

# Fitting the Holt-Winters' Exponential Smoothing model
model_ets = ExponentialSmoothing(train_data, trend='add', seasonal='add', seasonal_periods=12)
fit_ets = model_ets.fit()

# Forecasting the next 30 days
forecast_ets = fit_ets.forecast(steps=30)

# Plotting the training data, test data, and forecast
plt.figure(figsize=(14, 7))
train_data.plot(label='Training Data')
test_data.plot(label='Test Data')
forecast_ets.plot(label='ETS Forecast', color='red', linestyle='--')
plt.title('Holt-Winters Exponential Smoothing Forecast')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Correcting the forecast index to start from the day after the last date in the training data
forecast_start_date = train_data.index[-1] + pd.Timedelta(days=1)
forecast_end_date = forecast_start_date + pd.Timedelta(days=29)  # 30 days including the start date
forecast_dates = pd.date_range(forecast_start_date, forecast_end_date)
forecast_ets.index = forecast_dates

# Plotting the training data, test data, and corrected forecast
plt.figure(figsize=(14, 7))
train_data.plot(label='Training Data')
test_data.plot(label='Test Data')
forecast_ets.plot(label='ETS Forecast', color='red', linestyle='--')
plt.title('Holt-Winters Exponential Smoothing Forecast')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Extracting the portion of the test data that corresponds to the forecast period
test_data_for_evaluation = test_data.head(len(forecast_ets))

# Calculating the error metrics
mae = mean_absolute_error(test_data_for_evaluation, forecast_ets)
mse = mean_squared_error(test_data_for_evaluation, forecast_ets)
rmse = np.sqrt(mse)
mape = 100 * np.mean(np.abs((test_data_for_evaluation - forecast_ets) / test_data_for_evaluation))

mae, mse, rmse, mape

In [None]:
# Calculating daily returns
westfarmers_returns = westfarmers_data_cleaned['Close'].pct_change().dropna()

# Plotting daily returns
plt.figure(figsize=(14, 7))
westfarmers_returns.plot()
plt.title('Westfarmers Daily Returns')
plt.xlabel('Date')
plt.ylabel('Daily Return')
plt.grid(True)
plt.show()

In [None]:
# Calculating the 21-day rolling standard deviation
rolling_volatility = westfarmers_returns.rolling(window=21).std()

# Plotting the rolling volatility
plt.figure(figsize=(14, 7))
rolling_volatility.plot(color='blue', label='21-Day Rolling Volatility')
plt.title('21-Day Rolling Volatility of Westfarmers Stock Returns')
plt.xlabel('Date')
plt.ylabel('Volatility')
plt.legend()
plt.grid(True)
plt.show()