In [None]:
# Time-series analysis - March 2024 - Amtrak.csv dataset

In [None]:
# import needed common libraries
import pandas as pd
import numpy as np
import matplotlib.pylab as plt 

In [None]:
import os
os.chdir("Documents")
os.getcwd()

In [None]:
# read the Tractor-Sales.csv data file
df = pd.read_csv('Amtrak.csv')
df.head()

In [None]:
# Convert Month column to date format
df["Date"] = pd.to_datetime(df["Month"], format='%d/%m/%Y')
df


In [None]:
# Convert data to time series
rider_ts = pd.Series(df.Ridership.values, index=df.Date, name='Ridership')

In [None]:
# Plot the time series
# Do you observe any trend and seasonality in the plot?
ax = rider_ts.plot()
ax.set_xlabel('Time')
ax.set_ylabel('Ridership (in 000s)')
ax.set_ylim(1300,2300)
plt.show()

In [None]:
# Data partition for the time series data
# Keep the last three years data as the validation dataset
nValid = 36
nTrain = len(rider_ts) - nValid
train_ts = rider_ts[:nTrain]
valid_ts = rider_ts[nTrain:]

In [None]:
# Model 1: A linear trend time series model
import statsmodels.formula.api as sm
from statsmodels.tsa import tsatools, stattools
ts_df = tsatools.add_trend(rider_ts,trend='ct')  # "ct" adds both constant and trend
ts_df['Month']=ts_df.index.month

# display updated ts_df
ts_df

In [None]:
# partition data frame into training and validation to run regression
nValid = 36
nTrain = len(rider_ts) - nValid
train_df = ts_df[:nTrain]
valid_df = ts_df[nTrain:]


In [None]:
# fit a linear regression model
rider_lm = sm.ols(formula='Ridership ~ trend',data=train_df).fit()

rider_lm.summary()

In [None]:
# Evaluate Model1 performance
# ! pip install dmba
# Comment the above install code after dmba library has been installed
from dmba import regressionSummary
predict_lm = rider_lm.predict(valid_df)
regressionSummary(valid_ts,predict_lm)

In [None]:
# Model 2: A polynomial trend time series model
import numpy as np
rider_lm_poly = sm.ols(formula='Ridership ~ trend + np.square(trend) + np.power(trend,3)',
data=train_df).fit()
rider_lm_poly.summary()

In [None]:
# Evaluate Model2 performance
predict_lm_poly = rider_lm_poly.predict(valid_df)
regressionSummary(valid_ts,predict_lm_poly)

In [None]:
# Model 3: An additive seasonality model; C(Month) is used to specify Month as a categorical variable
rider_lm_season = sm.ols(formula='Ridership~C(Month)',data=train_df).fit()
rider_lm_season.summary()

In [None]:
# Evaluate Model3 performance
predict_lm_season = rider_lm_season.predict(valid_df)
regressionSummary(valid_ts, predict_lm_season)

In [None]:
# Model 4: A model with trend and seasonality
modelfomula = 'Ridership ~ trend + C(Month)'
rider_lm_trendseason = sm.ols(formula=modelfomula, data=train_df).fit()
rider_lm_trendseason.summary()

In [None]:
# Evaluate Model4 performance
predict_lm_trendseason = rider_lm_trendseason.predict(valid_df)
regressionSummary(valid_ts,predict_lm_trendseason)

In [None]:
# Visulize Model4
# Blue line for the original time series
# Orange line for the forecasted time series
fig, ax = plt.subplots(nrows=1,ncols=1,figsize=(9,3.75))
train_df.plot(y='Ridership',ax=ax,color='C0',linewidth=2)
valid_df.plot(y='Ridership',ax=ax,color='C0',linewidth=2, linestyle='dashed')
rider_lm_trendseason.predict(train_df).plot(color='C1')
rider_lm_trendseason.predict(valid_df).plot(color='C1',linestyle='dashed')
ax.get_legend().remove()
plt.show()

In [None]:
# Model 5: A model with polynomial trend and seasonality
modelfomula = 'Ridership ~ trend + np.square(trend) + C(Month)'
rider_lm_trendseason = sm.ols(formula=modelfomula, data=train_df).fit()
rider_lm_trendseason.summary()

In [None]:
# Evaluate Model5 performance
predict_lm_trendseason = rider_lm_trendseason.predict(valid_df)
regressionSummary(valid_ts,predict_lm_trendseason)

In [None]:
# Visulize Model5
# Blue line for the original time series
# Orange line for the forecasted time series
fig, ax = plt.subplots(nrows=1,ncols=1,figsize=(9,3.75))
train_df.plot(y='Ridership',ax=ax,color='C0',linewidth=2)
valid_df.plot(y='Ridership',ax=ax,color='C0',linewidth=2, linestyle='dashed')
rider_lm_trendseason.predict(train_df).plot(color='C1')
rider_lm_trendseason.predict(valid_df).plot(color='C1',linestyle='dashed')
ax.get_legend().remove()
plt.show()

In [None]:
# Using pandas to compute the Simple Moving Average (SMA) with a window of 12 months
df['SMA_6'] = df['Ridership'].rolling(window=6).mean()

# Using pandas to compute Exponential Smoothing (ES); here we use a common smoothing factor alpha = 0.2
df['ES_0.2'] = df['Ridership'].ewm(alpha=0.2, adjust=False).mean()

# Show the dataset with the new SMA and ES columns
df[['Ridership', 'SMA_6', 'ES_0.2']].tail()  # Displaying 15 rows to see some of the moving averages starting to form

In [None]:
# Visualize the moving average forecast
x = df["SMA_6"].dropna()
y = df["ES_0.2"]

# Blue line for x
# Orange line for y
fig, ax = plt.subplots(nrows=1,ncols=1,figsize=(9,3.75))
df.plot(y='Ridership', ax=ax,color='C0', linewidth=1)
x.plot(y='SMA_6',ax=ax,color='C1',linewidth=1, linestyle='dotted')
y.plot(y='ES_0.2',ax=ax,color='C2',linewidth=1, linestyle='dashed')
plt.show()