# ARIMA (AutoRegressive Integrated Moving Average)

## A popular time series forecasting method used to predict future stock prices.

In [14]:
import pandas as pd

file_name='DLINKINDIA.csv'
# Load stock price data
data = pd.read_csv(f'../data/raw/{file_name}', parse_dates=True)
data['datetime'] = pd.to_datetime(data['datetime'])

data.set_index('datetime', inplace=True)
data = data[['open', 'high', 'low', 'close', 'volume']]

data

Unnamed: 0_level_0,open,high,low,close,volume
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-01-01 09:15:00,73.40,73.45,72.55,73.10,11022
2019-01-01 10:15:00,73.10,73.50,72.10,73.30,2212
2019-01-01 11:15:00,73.30,73.30,73.00,73.00,513
2019-01-01 12:15:00,73.00,73.00,72.10,72.80,567
2019-01-01 13:15:00,72.80,73.20,72.70,73.20,311
...,...,...,...,...,...
2023-08-02 11:15:00,319.65,320.90,316.20,319.35,70920
2023-08-02 12:15:00,319.35,321.95,318.25,319.95,33586
2023-08-02 13:15:00,319.95,320.00,313.00,313.05,85282
2023-08-02 14:15:00,313.05,319.50,311.00,318.75,123367


### Resample data to daily frequency using OHLC dictionary


In [15]:
ohlc_dict = {                                                                                                             
    'open': 'first',                                                                                                    
    'high': 'max',                                                                                                       
    'low': 'min',                                                                                                        
    'close': 'last',                                                                                                    
    'volume': 'sum',
}

data = data.resample('D', closed='left', label='left').apply(ohlc_dict)

data.dropna(inplace=True)

data.tail(10)

Unnamed: 0_level_0,open,high,low,close,volume
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-07-20,300.45,317.25,295.0,314.2,2021394
2023-07-21,313.9,323.0,308.9,320.5,1310761
2023-07-24,321.45,329.7,315.25,322.0,905457
2023-07-25,322.0,322.9,294.15,301.85,1090657
2023-07-26,299.95,305.45,295.35,296.95,365650
2023-07-27,298.8,310.95,297.6,307.8,703015
2023-07-28,309.45,312.8,301.6,310.35,469952
2023-07-31,307.65,307.65,288.1,298.3,972858
2023-08-01,299.0,325.75,299.0,322.75,1477891
2023-08-02,322.3,327.85,311.0,317.0,822031


In [16]:
from statsmodels.tsa.arima.model import ARIMA
import plotly.graph_objects as go
import numpy as np

data = data[['high', 'low']]

# Define a function to fit ARIMA model and make predictions
def predict_stock_high_low(data, forecast_length):
    # Fit ARIMA model for 'high' prices
    model_high = ARIMA(data['high'], order=(5, 1, 0))
    model_high_fit = model_high.fit()

    # Forecast 'high' prices
    forecast_high = model_high_fit.forecast(steps=forecast_length)

    # Fit ARIMA model for 'low' prices
    model_low = ARIMA(data['low'], order=(5, 1, 0))
    model_low_fit = model_low.fit()

    # Forecast 'low' prices
    forecast_low = model_low_fit.forecast(steps=forecast_length)

    return forecast_high, forecast_low



In [17]:
# Set forecast length
forecast_length = 2

# Prepare data for prediction by dropping the last 5 rows
prediction_df = data.copy(deep=True)
prediction_df.drop(prediction_df.tail(forecast_length).index, inplace=True)

# Get predictions
forecast_high, forecast_low = predict_stock_high_low(prediction_df, forecast_length)




A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


No supported index is available. Prediction results will be given with an integer index beginning at `start`.


No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g

In [18]:
# Prepare data for visualization
# Use the original dates from the data for the forecast period, ensuring they match the trading days
forecast_dates = data.index[-forecast_length:]

# Add forecasted values to the original DataFrame
forecast_df = pd.DataFrame({
    'forecast_high': forecast_high.reset_index(drop=True),
    'forecast_low': forecast_low.reset_index(drop=True)
})
forecast_df.index = forecast_dates

# Combine original data and forecasted data
data = pd.concat([data, forecast_df], axis=1)

# Remove NaNs introduced during the concatenation
data.dropna(subset=['high', 'low'], inplace=True)


In [19]:
# Create Plotly graph
fig = go.Figure()
fig.add_trace(go.Scatter(x=data.index, y=data['high'], name='Actual High Price'))
fig.add_trace(go.Scatter(x=data.index, y=data['forecast_high'], name='Forecast High Price'))
fig.add_trace(go.Scatter(x=data.index, y=data['low'], name='Actual Low Price'))
fig.add_trace(go.Scatter(x=data.index, y=data['forecast_low'], name='Forecast Low Price'))
fig.update_layout(title='DLINKINDIA Stock High & Low Price Prediction using ARIMA', xaxis_title='Date', yaxis_title='Price')
fig.show()


In [20]:
data.tail()

Unnamed: 0_level_0,high,low,forecast_high,forecast_low
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-07-27,310.95,297.6,,
2023-07-28,312.8,301.6,,
2023-07-31,307.65,288.1,,
2023-08-01,325.75,299.0,305.842951,287.509695
2023-08-02,327.85,311.0,306.939158,288.116333
