In [None]:
"https://builtin.com/data-science/time-series-python"

In [None]:
import pandas as pd
df = pd.read_csv("../csv/Ming25Jun67_total_order.csv")
df.head()

In [None]:
df.tail()

In [None]:
df['order_completed_at'] = pd.to_datetime(df['order_completed_at'])

In [None]:
df.info()

In [None]:
df.index = df['order_completed_at']
del df['order_completed_at']
print(df.head())

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns 

In [None]:
plt.figure(figsize=(20, 10)) 

sns.lineplot(df)
plt.title('Total Order By Date from 2022-2024')
plt.xlabel('Date')
plt.ylabel("Number Of Orders.")

### Stationarity test

In [None]:
rolling_mean = df.rolling(7).mean()
rolling_std = df.rolling(7).std()

In [None]:
plt.figure(figsize=(10, 5)) 
plt.plot(df, color="blue",label="Original Orders Data")

In [None]:
plt.figure(figsize=(10, 5)) 
plt.plot(rolling_mean, color="red", label="Rolling Mean Orders Number")

In [None]:
plt.figure(figsize=(10, 5)) 
plt.plot(rolling_std, color="black", label = "Rolling Standard Deviation in Orders Number")

In [None]:
plt.figure(figsize=(12, 6)) 
plt.plot(df, color="blue",label="Original Orders Data")
plt.plot(rolling_mean, color="red", label="Rolling Mean Orders Number")
plt.plot(rolling_std, color="black", label = "Rolling Standard Deviation in Orders Number")
plt.title("Orders Time Series, Rolling Mean, Standard Deviation")
plt.legend(loc="best")

In [None]:
from statsmodels.tsa.stattools import adfuller

adft = adfuller(df,autolag="AIC")

output_df = pd.DataFrame({"Values":[adft[0],adft[1],adft[2],adft[3], adft[4]['1%'], adft[4]['5%'], adft[4]['10%']]  , "Metric":["Test Statistics","p-value","No. of lags used","Number of observations used", 
                                                        "critical value (1%)", "critical value (5%)", "critical value (10%)"]})
print(output_df)


In [None]:
autocorrelation_lag1 = df['unique_order_count'].autocorr(lag=1)
print("One Day Lag: ", autocorrelation_lag1)

In [None]:
for i in range(1,34):
    autocorrelation_lag = df['unique_order_count'].autocorr(lag=i)
    print(f"{i} Day Lag: ", autocorrelation_lag)
    print()

In [None]:
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose

# Decompose the time series data
decompose = seasonal_decompose(df['unique_order_count'], model='additive', period=7)

# Create a new figure with specified size
fig, axes = plt.subplots(4, 1, figsize=(15, 10))  # Adjust the size as needed

# Plot the observed, trend, seasonal, and residual components
decompose.observed.plot(ax=axes[0], title='Observed')
decompose.trend.plot(ax=axes[1], title='Trend')
decompose.seasonal.plot(ax=axes[2], title='Seasonal')
decompose.resid.plot(ax=axes[3], title='Residual')

# Adjust layout to avoid overlap
plt.tight_layout()

# Show the plot
plt.show()


In [None]:
df['Date'] = df.index
train = df[df['Date'] < pd.Timestamp("2024-05-01")]
train['train'] = train['unique_order_count']
del train['Date']
del train['unique_order_count']

test = df[df['Date'] >= pd.Timestamp("2024-05-01")]
del test['Date']
test['test'] = test['unique_order_count']
del test['unique_order_count']

plt.figure(figsize=(15, 10)) 
plt.plot(train, color = "black")
plt.plot(test, color = "red")
plt.title("Train/Test split for Orders Data")
plt.ylabel("Number Of Orders")
plt.xlabel('Date')
sns.set()
plt.show()

In [None]:
from pmdarima.arima import auto_arima
model = auto_arima(train, trace=True, error_action='ignore', suppress_warnings=True)
model.fit(train)
forecast = model.predict(n_periods=len(test))
forecast = pd.DataFrame(forecast,index = test.index,columns=['Prediction'])

In [None]:
plt.figure(figsize=(15, 10)) 
plt.plot(train)
plt.plot(test)
plt.plot(forecast)
plt.title("Train/Test split for Orders Data")
plt.ylabel("Number Of Orders")
plt.xlabel('Date')
sns.set()
plt.show()

In [None]:
from math import sqrt
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(test,forecast)
rms = sqrt(mse)

print("RMSE: ", rms)
print("MSE: ", mse)

In [None]:
from pmdarima.arima import auto_arima
model = auto_arima(df, trace=True, error_action='ignore', suppress_warnings=True)
model.fit(df)
forecast = model.predict(n_periods=7)

forecast = pd.DataFrame(forecast, columns=['Prediction'])

In [None]:
print(forecast)