In [1]:
# ---
# jupyter:
#   jupytext:
#     formats: ipynb,py:percent
#     text_representation:
#       extension: .py
#       format_name: percent
#       format_version: '1.3'
#       jupytext_version: 1.15.2
#   kernelspec:
#     display_name: Python 3 (ipykernel)
#     language: python
#     name: python3
# ---

# %% [markdown]
# # Time Series Exploratory Data Analysis (EDA) 

# This notebook provides a comprehensive EDA for time series data, including detailed seasonality investigation.

# %%
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller
import statsmodels.api as sm


ModuleNotFoundError: No module named 'seaborn'

# %%
# Load your time series data
# Replace 'your_data.csv' with the actual path to your data file

In [None]:
df = pd.read_csv('your_data.csv', index_col='Date', parse_dates=True) 

In [None]:
# %%
# Basic Data Exploration
print(df.head())
print(df.info())
print(df.describe())

In [None]:
# %%
# Visualize the Time Series
plt.figure(figsize=(12, 6))
plt.plot(df)
plt.title('Time Series Plot')
plt.xlabel('Date')
plt.ylabel('Value')
plt.show()

In [None]:
# %%
# Check for Stationarity (Visual Inspection)
# Plot the rolling mean and standard deviation
rolling_mean = df.rolling(window=12).mean()  # Adjust window size as needed
rolling_std = df.rolling(window=12).std()

plt.figure(figsize=(12, 6))
plt.plot(df, color='blue', label='Original')
plt.plot(rolling_mean, color='red', label='Rolling Mean')
plt.plot(rolling_std, color='green', label='Rolling Std')
plt.legend(loc='best')
plt.title('Rolling Statistics')
plt.show()


In [None]:
# %%
# Decompose Time Series (if applicable)
decomposition = seasonal_decompose(df, model='additive', period=12)  # Adjust period as needed

plt.figure(figsize=(12, 8))
plt.subplot(411)
plt.plot(df, label='Original')
plt.legend(loc='best')
plt.subplot(412)
plt.plot(decomposition.trend, label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(decomposition.seasonal, label='Seasonality')
plt.legend(loc='best')
plt.subplot(414)
plt.plot(decomposition.resid, label='Residuals')
plt.legend(loc='best')
plt.tight_layout()
plt.show()


In [None]:
# %%
# Investigate Seasonality Patterns
df['month'] = df.index.month
fig, ax = plt.subplots(figsize=(10, 6))
sns.boxplot(x='month', y='Value', data=df, ax=ax)
ax.set_title('Seasonal Boxplot (Monthly)')
plt.show()

# %%
# Autocorrelation and Partial Autocorrelation Functions (ACF and PACF)
plt.figure(figsize=(12, 6))
plt.subplot(121)
plot_acf(df, lags=40)
plt.subplot(122)
plot_pacf(df, lags=40)
plt.show()


In [None]:
# %%
# Check for Stationarity (Dickey-Fuller Test)
def stationarity_test(timeseries):
    """
    Performs the Dickey-Fuller test to check for stationarity.

    Args:
      timeseries: The time series data.

    Returns:
      A tuple containing the test statistic, p-value, lags used, 
      number of observations used, critical values, and the 
      results of the test (whether the series is stationary or not).
    """
    print('Results of Dickey-Fuller Test:')
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic',
                                         'p-value',
                                         '#Lags Used',
                                         'Number of Observations Used'])
    for key, value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print(dfoutput)

    if dftest[1] <= 0.05:
        print("Conclusion: Series is likely stationary.")
    else:
        print("Conclusion: Series is likely non-stationary.")
        # %%
        # Differencing to achieve stationarity
        df['Value_diff'] = df['Value'].diff().dropna()
        stationarity_test(df['Value_diff'].dropna())

        plt.figure(figsize=(12,6))
        plt.plot(df['Value_diff'], label='Differenced Data')
        plt.title('First Order Differencing')
        plt.legend()
        plt.show()

stationarity_test(df['Value']) 



In [None]:
# %%
# Fit ARIMA Model
model = sm.tsa.ARIMA(df['Value'].dropna(), order=(1,1,1))
result = model.fit()
print(result.summary())

In [None]:
# %%
# Forecasting
forecast_steps = 12
forecast = result.forecast(steps=forecast_steps)
plt.figure(figsize=(12,6))
plt.plot(df['Value'], label='Historical Data')
plt.plot(pd.date_range(df.index[-1], periods=forecast_steps, freq='M'), forecast, label='Forecast', color='red')
plt.legend()
plt.title('Forecasting Future Values')
plt.show()