In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math
import statsmodels
from statsmodels.tsa.arima.model import ARIMA 

df = pd.read_csv(r'C:\Desktop\Finsearch_23_G12\Stock_data\Nifty_50_2000-2023\Nifty_50_with_indicators_.csv') 
df['Date']=pd.to_datetime(df['Date'])
df = df.sort_values(by='Date')
df = df.set_index(pd.DatetimeIndex(df['Date'].values))
df

In [None]:
plt.figure(figsize=(12.2,4.5))
plt.plot(df['close'],label='Close')
plt.title('Closing Price history')
plt.xlabel('Date')
plt.ylabel('Closing prices')
plt.show()

**Stationarity**<br>
Subtract the previous value from the current value. Now if we just difference once, we might not get a stationary series so we might need to do that multiple times.

And the minimum number of differencing operators needed to make the series stationary needs to be inputed into our ARIMA Model.
<br><br>
**ADF Test**<br>
We'll use the Augmented Dickey Fuller (ADF) Test to check if the price series is stationary. The null-hypothesis of the ADF test is that the time series is not-stationary. So if the p-value of test is less than the significant level(0.05) then we can reject the null-hypothesis and infer that the time series is indeed stationary.

So, if p-value>0.05 , we'll need to fing the order of differencing

In [None]:
# Check if the data is stationary
from statsmodels.tsa.stattools import adfuller

result = adfuller(df.close.dropna())
print(f"ADF Statistic: {result[0]}")
print(f"p-value: {result[1]}")

**Autocorelation function (ACF)**

In [None]:
from statsmodels.graphics.tsaplots import plot_acf

fig, (ax1, ax2) = plt.subplots(1,2, figsize=(16,4))

ax1.plot(df.close)
ax1.set_title("Original")
plot_acf(df.close, ax=ax2);

In [None]:
diff=df.close.diff().dropna()

fig, (ax1, ax2) = plt.subplots(1,2, figsize=(16,4))
ax1.plot(diff)
ax1.set_title("Difference once")
plot_acf(diff, ax=ax2);

In [None]:
%pip install --skip-lock pmdarima

from pmdarima.arima.utils import ndiffs
ndiffs(df.close, test="adf") # this could have given the output as 1 (calculating d)

In [None]:
diff=df.close.diff().dropna()

fig, (ax1, ax2) = plt.subplots(1,2, figsize=(16,4))
ax1.plot(diff)
ax1.set_title("Difference once")
plot_acf(diff, ax=ax2, ylim= (0, 1));

In [None]:
from statsmodels.tsa.arima.model import ARIMA 

#ARIMA Model
model = ARIMA(df.close, order=(5,1,12))
result=model.fit()
print(result.summary())

In [None]:
#plot residual errors
residuals = pd.DataFrame(result.resid)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16,4))

ax1.plot(residuals)
ax2.hist(residuals, density=True)

In [None]:
#Actual vs Fitted
# Obtain the predicted values
predicted = result.predict(start=0, end=100)

# Plot actual vs. fitted values
plt.figure(figsize=(20, 20))
plt.plot(df.close[0:100], label='Actual')
plt.plot(predicted, label='Predicted')
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('ARIMA Model Predictions')
plt.legend()
# Save the plot
#plt.savefig('C:\Desktop\ARIMA_predictions.png')  # Provide the desired filename and extension
plt.show()