# Import Required Libraries
Import necessary libraries including pandas, numpy, matplotlib, statsmodels, and pmdarima for time series analysis.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import pmdarima as pm

# Set matplotlib inline for Jupyter notebooks
%matplotlib inline

# Load and Prepare Time Series Data
Load time series data from a file or generate sample data. Ensure the data is properly formatted with a datetime index.

In [None]:
# Load and Prepare Time Series Data

# Load time series data from a CSV file
# For demonstration, we will generate sample data
date_rng = pd.date_range(start='2020-01-01', end='2021-01-01', freq='D')
data = np.random.randn(len(date_rng))

# Create a DataFrame with the generated data
df = pd.DataFrame(data, index=date_rng, columns=['value'])

# Ensure the data is properly formatted with a datetime index
df.index = pd.to_datetime(df.index)

# Display the first few rows of the DataFrame
df.head()

# Plot the time series data
df.plot(figsize=(10, 6))
plt.title('Sample Time Series Data')
plt.xlabel('Date')
plt.ylabel('Value')
plt.show()

# Explore Time Series Data
Visualize the time series data to identify patterns, trends, and seasonality using plots.

In [None]:
# Explore Time Series Data

# Plot the time series data to visualize patterns, trends, and seasonality
plt.figure(figsize=(12, 6))
plt.plot(df.index, df['value'], label='Time Series Data')
plt.title('Time Series Data Visualization')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend()
plt.show()

# Decompose the time series data to observe trend, seasonality, and residuals
decomposition = sm.tsa.seasonal_decompose(df['value'], model='additive')
fig = decomposition.plot()
fig.set_size_inches(12, 8)
plt.show()

# Plot autocorrelation and partial autocorrelation to identify patterns
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
sm.graphics.tsa.plot_acf(df['value'], lags=40, ax=axes[0])
sm.graphics.tsa.plot_pacf(df['value'], lags=40, ax=axes[1])
plt.show()

# Check for Stationarity
Perform ADF and KPSS tests to check if the time series is stationary. Apply differencing if necessary to achieve stationarity.

In [None]:
# Check for Stationarity

from statsmodels.tsa.stattools import adfuller, kpss

# Perform ADF test
adf_result = adfuller(df['value'])
print('ADF Statistic:', adf_result[0])
print('p-value:', adf_result[1])
for key, value in adf_result[4].items():
    print('Critical Value ({}): {}'.format(key, value))

# Perform KPSS test
kpss_result = kpss(df['value'], regression='c')
print('\nKPSS Statistic:', kpss_result[0])
print('p-value:', kpss_result[1])
for key, value in kpss_result[3].items():
    print('Critical Value ({}): {}'.format(key, value))

# Apply differencing if necessary
df['value_diff'] = df['value'].diff().dropna()

# Re-check stationarity after differencing
adf_result_diff = adfuller(df['value_diff'].dropna())
print('\nADF Statistic (Differenced):', adf_result_diff[0])
print('p-value (Differenced):', adf_result_diff[1])
for key, value in adf_result_diff[4].items():
    print('Critical Value (Differenced - {}): {}'.format(key, value))

kpss_result_diff = kpss(df['value_diff'].dropna(), regression='c')
print('\nKPSS Statistic (Differenced):', kpss_result_diff[0])
print('p-value (Differenced):', kpss_result_diff[1])
for key, value in kpss_result_diff[3].items():
    print('Critical Value (Differenced - {}): {}'.format(key, value))

# Plot the differenced time series data
plt.figure(figsize=(10, 6))
plt.plot(df.index, df['value_diff'], label='Differenced Time Series Data')
plt.title('Differenced Time Series Data')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend()
plt.show()

# Determine ARIMA Parameters
Use ACF and PACF plots to determine appropriate p, d, q parameters for the ARIMA model. Alternatively, use auto_arima to automatically find optimal parameters.

In [None]:
# Determine ARIMA Parameters

# Plot ACF and PACF to determine p and q
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
sm.graphics.tsa.plot_acf(df['value_diff'].dropna(), lags=40, ax=axes[0])
sm.graphics.tsa.plot_pacf(df['value_diff'].dropna(), lags=40, ax=axes[1])
plt.show()

# Use auto_arima to automatically find optimal p, d, q parameters
auto_arima_model = pm.auto_arima(df['value'], seasonal=False, trace=True)
print(auto_arima_model.summary())

# Extract the optimal p, d, q parameters
optimal_params = auto_arima_model.order
print(f'Optimal parameters: p={optimal_params[0]}, d={optimal_params[1]}, q={optimal_params[2]}')

# Fit ARIMA Model
Fit the ARIMA model to the time series data using the determined parameters. Display model summary and coefficients.

In [None]:
# Fit ARIMA Model

# Fit the ARIMA model using the determined parameters
model = sm.tsa.ARIMA(df['value'], order=optimal_params)
fitted_model = model.fit()

# Display the model summary
print(fitted_model.summary())

# Display the model coefficients
print(fitted_model.params)

# Plot the fitted values against the actual values
plt.figure(figsize=(10, 6))
plt.plot(df.index, df['value'], label='Original')
plt.plot(df.index, fitted_model.fittedvalues, color='red', label='Fitted')
plt.title('ARIMA Model Fit')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend()
plt.show()

# Evaluate Model Performance
Analyze model residuals, perform diagnostic tests, and calculate error metrics such as MAE, RMSE, and MAPE.

In [None]:
# Evaluate Model Performance

# Analyze model residuals
residuals = fitted_model.resid

# Plot residuals
plt.figure(figsize=(10, 6))
plt.plot(df.index, residuals, label='Residuals')
plt.title('Residuals of ARIMA Model')
plt.xlabel('Date')
plt.ylabel('Residuals')
plt.legend()
plt.show()

# Perform diagnostic tests on residuals
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
sm.graphics.tsa.plot_acf(residuals, lags=40, ax=axes[0])
sm.graphics.tsa.plot_pacf(residuals, lags=40, ax=axes[1])
plt.show()

# Calculate error metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Mean Absolute Error (MAE)
mae = mean_absolute_error(df['value'], fitted_model.fittedvalues)
print('Mean Absolute Error (MAE):', mae)

# Root Mean Squared Error (RMSE)
rmse = np.sqrt(mean_squared_error(df['value'], fitted_model.fittedvalues))
print('Root Mean Squared Error (RMSE):', rmse)

# Mean Absolute Percentage Error (MAPE)
mape = np.mean(np.abs((df['value'] - fitted_model.fittedvalues) / df['value'])) * 100
print('Mean Absolute Percentage Error (MAPE):', mape)

# Make Forecasts
Generate forecasts for future time periods using the fitted ARIMA model and visualize the predictions against actual values.

In [None]:
# Make Forecasts

# Generate forecasts for future time periods
forecast_steps = 30  # Number of steps to forecast
forecast = fitted_model.forecast(steps=forecast_steps)

# Create a DataFrame to hold the forecasted values
forecast_dates = pd.date_range(start=df.index[-1] + pd.Timedelta(days=1), periods=forecast_steps, freq='D')
forecast_df = pd.DataFrame(forecast, index=forecast_dates, columns=['forecast'])

# Plot the actual values and the forecasted values
plt.figure(figsize=(12, 6))
plt.plot(df.index, df['value'], label='Actual')
plt.plot(forecast_df.index, forecast_df['forecast'], color='red', label='Forecast')
plt.title('ARIMA Model Forecast')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend()
plt.show()

# Combine the actual and forecasted values for comparison
combined_df = pd.concat([df, forecast_df], axis=0)

# Plot the combined actual and forecasted values
plt.figure(figsize=(12, 6))
plt.plot(combined_df.index, combined_df['value'], label='Actual')
plt.plot(combined_df.index, combined_df['forecast'], color='red', label='Forecast')
plt.title('Actual vs Forecasted Values')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend()
plt.show()