In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error

# Step 1: Load the data (Here, we'll generate synthetic data)
# Uncomment and modify the following line if you have your own dataset
# data = pd.read_csv('your_dataset.csv', parse_dates=['Date'], index_col='Date')

# Generate synthetic data
np.random.seed(42)
date_range = pd.date_range(start='1/1/2020', periods=100, freq='M')
data = pd.Series(np.random.randn(100).cumsum(), index=date_range)

# Step 2: Visualize the time series
plt.figure(figsize=(10, 6))
plt.plot(data)
plt.title('Time Series Data')
plt.xlabel('Date')
plt.ylabel('Value')
plt.show()

# Step 3: Perform stationarity test (ADF Test)
result = adfuller(data)
print('ADF Statistic:', result[0])
print('p-value:', result[1])
for key, value in result[4].items():
    print(f'Critical Value ({key}): {value}')

# Step 4: Differencing the series (if needed) to make it stationary
data_diff = data.diff().dropna()
result_diff = adfuller(data_diff)
print('ADF Statistic after differencing:', result_diff[0])
print('p-value after differencing:', result_diff[1])
for key, value in result_diff[4].items():
    print(f'Critical Value ({key}): {value}')

# Step 5: Identify the best parameters (p, d, q) for the ARIMA model using ACF and PACF plots
fig, ax = plt.subplots(1, 2, figsize=(16, 6))
plot_acf(data_diff, ax=ax[0])
plot_pacf(data_diff, ax=ax[1])
plt.show()

# Step 6: Fit the ARIMA model
p, d, q = 1, 1, 1  # These parameters are usually determined from ACF and PACF plots
model = ARIMA(data, order=(p, d, q))
model_fit = model.fit()
print(model_fit.summary())

# Step 7: Evaluate the model
data_forecast = model_fit.fittedvalues
mse = mean_squared_error(data[d:], data_forecast[d:])
print('Mean Squared Error:', mse)

# Step 8: Forecast future values
forecast_steps = 10
forecast = model_fit.forecast(steps=forecast_steps)
forecast_index = pd.date_range(start=data.index[-1], periods=forecast_steps + 1, freq='M')[1:]
forecast_series = pd.Series(forecast, index=forecast_index)

plt.figure(figsize=(10, 6))
plt.plot(data, label='Original')
plt.plot(forecast_series, label='Forecast')
plt.title('Forecasted Values')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend()
plt.show()


ModuleNotFoundError: No module named 'statsmodels'