In [None]:
import numpy as np
from scipy import stats
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima_model import ARIMA
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import mean_squared_error, mean_absolute_error
import math
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf
from statsmodels.graphics.api import qqplot
%matplotlib inline

In [None]:
df = pd.read_csv("Cryogenic Storage Tank Sales data.xlsx - Cryogenic sales data.csv")
df.head()

In [None]:
df.rename(columns={"Sales in Thousands":"sales"},inplace=True)
df.rename(columns={"Ship Date":"Date"},inplace=True)

In [None]:
df1 = df[["sales",'Date']]
df1.info()

In [None]:
df1.Date = pd.to_datetime(df1.Date)

In [None]:
df1.info()

In [None]:
df1

In [None]:
df1 = df1.set_index("Date")

In [None]:
df1

In [None]:
import matplotlib.pyplot as plt
plt.plot(df1['sales'])

In [None]:
sales_by_month = df1.groupby('Date')['sales'].sum()
sales_by_month.shape

In [None]:
sales_by_month_df = sales_by_month.reset_index()

# Rename the columns
sales_by_month_df.columns = ['Date', 'sales']

print(type(sales_by_month_df))

In [None]:
sales_by_month_df = sales_by_month_df.set_index("Date")

In [None]:
plt.plot(sales_by_month_df['sales'])

In [None]:
# plotting sales 
plt.style.use('ggplot')
plt.figure(figsize=(18,8)) 
plt.grid(True) 
plt.xlabel('month', fontsize = 20) 
plt.xticks(fontsize = 15)
plt.ylabel('sales', fontsize = 20)
plt.yticks(fontsize = 15) 
plt.plot(sales_by_month_df['sales'], linewidth = 3, color = 'blue')
plt.title('Sales by Year and Month', fontsize = 30)
plt.show()

In [None]:
import seaborn as sns
sns.distplot(sales_by_month_df['sales'])

In [None]:
df_sales = sales_by_month_df['sales'] 
df_sales.plot(kind='kde',figsize = (18,8), linewidth= 3) 
plt.xticks(fontsize = 15)
plt.grid("both")
plt.ylabel('Density', fontsize = 20)
plt.yticks(fontsize = 15)
plt.show()

In [None]:
np.mean(sales_by_month_df['sales'].head(10))

In [None]:
plt.plot(sales_by_month_df['sales'])

In [None]:
plt.plot(sales_by_month_df['sales'].rolling(3).mean())

In [None]:
plt.plot(sales_by_month_df['sales'].rolling(365).mean())

In [None]:
plt.plot(sales_by_month_df['sales'].rolling(365).std())

In [None]:
adfuller(sales_by_month_df['sales'],autolag='AIC')

In [None]:
#Test for staionarity
def test_stationarity(timeseries):
    # Determing rolling statistics
    rolmean = timeseries.rolling(12).mean() # rolling mean
    rolstd = timeseries.rolling(12).std() # rolling standard deviation
    # Plot rolling statistics:
    plt.figure(figsize = (18,8))
    plt.grid('both')
    plt.plot(timeseries, color='blue',label='Original', linewidth = 3)
    plt.plot(rolmean, color='red', label='Rolling Mean',linewidth = 3)
    plt.plot(rolstd, color='black', label = 'Rolling Std',linewidth = 4)
    plt.legend(loc='best', fontsize = 20, shadow=True,facecolor='lightpink',edgecolor = 'k')
    plt.title('Rolling Mean and Standard Deviation', fontsize = 25)
    plt.xticks(fontsize = 15)
    plt.yticks(fontsize = 15)
    plt.show(block=False)
    
    print("Results of dickey fuller test")
    adft = adfuller(timeseries,autolag='AIC')
    # output for dft will give us without defining what the values are.
    # hence we manually write what values does it explains using a for loop
    output = pd.Series(adft[0:4],index=['Test Statistics','p-value','No. of lags used','Number of observations used'])
    for key,values in adft[4].items():
        output['critical value (%s)'%key] =  values
    print(output)

In [None]:
test_stationarity(sales_by_month_df['sales'])

In [None]:
df_saless = sales_by_month_df['sales']

In [None]:
df_saless.diff(1)

In [None]:
df_sales_diff_1 = df_saless.diff(1)

In [None]:
df_sales_diff_1.dropna(inplace=True)

In [None]:
test_stationarity(df_sales_diff_1)

In [None]:
result=seasonal_decompose(sales_by_month_df[["sales"]],model='additive',period=356)

In [None]:
fig=plt.figure(figsize=(20,10))
fig=result.plot()
fig.set_size_inches(17,10)

In [None]:
result1=seasonal_decompose(sales_by_month_df[["sales"]],model='multiplicative',period=365)

In [None]:
fig=plt.figure(figsize=(20,10))
fig=result1.plot()
fig.set_size_inches(17,10)

In [None]:
train_data=sales_by_month_df[0:1000]
train_data.shape

In [None]:
test_data=sales_by_month_df[1000:]
test_data.shape

In [None]:
train_data=train_data
test_data=test_data
plt.figure(figsize=(18,8))
plt.grid(True)
plt.xlabel('Months', fontsize = 20)
plt.ylabel('Sales', fontsize = 20)
plt.xticks(fontsize = 15)
plt.xticks(fontsize = 15)
plt.plot(train_data, 'green', label='Train data', linewidth = 5)
plt.plot(test_data, 'blue', label='Test data', linewidth = 5)
plt.legend(fontsize = 20, shadow=True,facecolor='lightpink',edgecolor = 'k')

In [None]:
sales_data = sales_by_month_df['sales'].values
# show plots in the notebook
%matplotlib inline
fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(sales_data.squeeze(), lags=100, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(sales_data, lags=100, ax=ax2)

In [None]:
fig = plt.figure(figsize=(12,12))
ax1 = fig.add_subplot(211)#function
fig = plot_acf(df_sales_diff_1, ax=ax1)
ax2 = fig.add_subplot(212)
fig = plot_pacf(df_sales_diff_1, ax=ax2)

In [None]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from matplotlib import pyplot as plt
from pandas import read_csv
from pandas import datetime 
from matplotlib import pyplot
from sklearn.metrics import mean_squared_error,mean_absolute_error
from math import sqrt
predictions = list()
conf_list = list()
model1 = ARIMA(train_data['sales'], order=(2,1,2))
model_fit1 = model1.fit()
forecast_length = len(test_data)
output1= model_fit1.forecast(steps=forecast_length)
output1

In [None]:
summary = model_fit1.summary()

In [None]:
summary

In [None]:
y_true = test_data['sales'].values
y_true.shape

In [None]:
y_pred = output1[:forecast_length]
y_pred.shape

In [None]:
from sklearn.metrics import mean_squared_error,mean_absolute_error
from math import sqrt
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
print(f'Mean Squared Error: {mse}')
print(f"Root Mean Squared Error: {rmse}")

In [None]:
train_data_s = df_sales_diff_1[0:1000]
test_data_s = df_sales_diff_1[1000:]
print(test_data_s.shape)

In [None]:
import statsmodels.api as sm

model = sm.tsa.statespace.SARIMAX(train_data_s, order=(2, 1, 2), seasonal_order=(5, 0, 0, 3))
model_fit = model.fit()
forecast_length = len(test_data_s)
forecast = model_fit.forecast(steps=forecast_length)

print("RMSE of SARIMA Model:",np.sqrt(mean_squared_error(test_data_s,forecast)))

In [None]:
forecast

In [None]:
summary1 = model_fit.summary()
summary1

In [None]:
y_true = test_data_s.values
y_true.shape

In [None]:
y_pred = forecast[:forecast_length]
y_pred.shape

In [None]:
from sklearn.metrics import mean_squared_error,mean_absolute_error
from math import sqrt
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
print(f'Mean Squared Error: {mse}')
print(f"Root Mean Squared Error: {rmse}")

In [None]:
from pmdarima import auto_arima
model_autoArima = auto_arima(train_data,start_p=1,start_q = 1,test='adf',max_p=3,max_q=3,m=12,d=None,seasonal=True,start_P=1,D=1,error_action='ignore',suppress_warnings=True,stepwise=True)

In [None]:
forecast_length = 333  
forecast_auto = model_autoArima.predict(n_periods=forecast_length)
print("Forecasted values:", forecast)
mse = mean_squared_error(test_data[:forecast_length], forecast)
rmse = np.sqrt(mse)
print(f'Mean Squared Error: {mse}')
print(f"Root Mean Squared Error: {rmse}")

In [None]:
auto_model.plot_diagnostics(figsize=(15,8))
plt.show()