#**Exploratory Data Analysis**

### **Installation and Import**

In [58]:
# Import the packages
import pandas as pd
import numpy as np
import plotly.express as px
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller,kpss

In [22]:
df = pd.read_excel('/content/drive/MyDrive/Github/m_tech/Datasets/input.xlsx')

In [23]:
df = df.dropna()

In [24]:
df.tail()

Unnamed: 0,Date,SalesVolume
313,2021-02-01,73883.0
314,2021-03-01,102914.0
315,2021-04-01,64632.0
316,2021-05-01,45580.0
317,2021-06-01,41232.0


## **Visualizing the TS Data**

In [35]:
viz_df = df.copy()

**Line Plot**

In [36]:
fig = px.line(viz_df, x='Date', y="SalesVolume")
fig.show()

**Box Plot**

In [42]:
viz_df['year'] = pd.DatetimeIndex(viz_df['Date']).year

In [43]:
fig = px.box(viz_df,x="year",y="SalesVolume")
fig.show()

**Seasonal decomposition**

In [44]:
stl_df = df.set_index('Date')

In [45]:
stl_df.head()

Unnamed: 0_level_0,SalesVolume
Date,Unnamed: 1_level_1
1995-01-01,47639.0
1995-02-01,47880.0
1995-03-01,67025.0
1995-04-01,56925.0
1995-05-01,64192.0


In [47]:
result = seasonal_decompose(stl_df['SalesVolume'],freq=12,model='additive')

In [53]:
stl_df['Seasonal'] = result.seasonal
stl_df['Trend'] = result.trend
stl_df['Residual'] = result.resid
stl_df = stl_df.reset_index()

In [55]:
#Seasonal plot 
fig = px.line(stl_df,x = 'Date', y = 'Seasonal')
fig.show()

In [56]:
#Trend plot 
fig = px.line(stl_df,x = 'Date', y = 'Trend')
fig.show()

In [57]:
#Residual plot 
fig = px.line(stl_df,x = 'Date', y = 'Residual')
fig.show()

## **ADF Test**

In [25]:
adf_test_df = df.set_index('Date').squeeze()

In [29]:
sales_val = adf_test_df.values

In [30]:
result = adfuller(sales_val)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))

ADF Statistic: -2.544328
p-value: 0.105058
Critical Values:
	1%: -3.452
	5%: -2.871
	10%: -2.572


### Inference:
ADF Statistic: **-2.544328** is  less than Critical Values 1%: **-3.452** which suggests the possibility to reject the null hypothesis and proving the existence of stationarity

## **KPSS Test**

In [63]:
def kpss_test(series, **kw):    
    statistic, p_value, n_lags, critical_values = kpss(series, **kw)
    # Format Output
    print(f'KPSS Statistic: {statistic}')
    print(f'p-value: {p_value}')
    print(f'num lags: {n_lags}')
    print('Critial Values:')
    for key, value in critical_values.items():
        print(f'   {key} : {value}')

In [64]:
kpss_test(sales_val, regression='ct')

KPSS Statistic: 0.14807356572276226
p-value: 0.04827202856436478
num lags: 17
Critial Values:
   10% : 0.119
   5% : 0.146
   2.5% : 0.176
   1% : 0.216






### Inference:
KPSS Statistic: **0.1480735** is  less than Critical Values 1%: **0.216** , further confirming the stationarity of the series and parameter 'regression' shows the existence of a deterministic trend