<a href="https://www.kaggle.com/code/abbasamjherawala/arma-model-for-nsei-stock-prediction?scriptVersionId=121327056" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# *Model:* ARMA
# *Stock Data:* NSEI
#  *Name: *Abbas Amjherawala**

In [3]:
!pip install yfinance
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
from tqdm import tqdm
from pandas.plotting import register_matplotlib_converters
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from datetime import datetime, timedelta, timezone
from time import time

Collecting yfinance
  Downloading yfinance-0.2.12-py2.py3-none-any.whl (59 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.2/59.2 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
Collecting multitasking>=0.0.7
  Downloading multitasking-0.0.11-py3-none-any.whl (8.5 kB)
Installing collected packages: multitasking, yfinance
Successfully installed multitasking-0.0.11 yfinance-0.2.12
[0m

# **Import Data and calculate Returns**
> **plots:** *prices of stocks over time and returns of stocks over time*

In [4]:
tickerSymbol = '^NSEI'
data = yf.Ticker(tickerSymbol)
data

yfinance.Ticker object <^NSEI>

In [None]:
prices = data.history(start='2022-03-03', end='2023-03-03').Close
prices

In [None]:
returns = prices.pct_change().dropna()
returns

In [None]:
plt.figure(figsize=(10,4))
plt.plot(prices)
plt.ylabel('Closing Prices', fontsize=20)
plt.title('Stock Rate over Time', fontsize=20)

dates = pd.date_range(start='2022-03-03', end='2023-03-03', freq='M')
for date in dates:
    plt.axvline(date, color='k', linestyle='--', alpha=0.2)
plt.axhline(prices.mean(), color='r', alpha=0.2, linestyle='--')

In [None]:
plt.figure(figsize=(10,4))
plt.plot(returns)
plt.ylabel('Returns', fontsize=20)
plt.title('Stock Returns over Time', fontsize=20)

dates = pd.date_range(start='2022-03-03', end='2023-03-03', freq='M')
for date in dates:
    plt.axvline(date, color='k', linestyle='--', alpha=0.2)
plt.axhline(returns.mean(), color='r', alpha=0.2, linestyle='--')

# ACF for MA
> No proper identifications captured

In [None]:
plot_acf(returns)
plt.show()

# PACF for AR
> No proper identification captured

In [None]:
plot_pacf(returns, method='ywm')
plt.show()

# Train and Test data splits
1. divide price dataframe
2. train ARIMA model on train set
3. test order of ARIMA model among: (0,0,0), (4,0,4), (4,0,6) and (6,0,6)
4. predicting test data on ARIMA order (5,0,4), best
5. plotting original vs predicted data
6. calculating acuuracy: mae, mse, mape

In [None]:
p_train_date='2023-01-02'
p_train_data = prices[:p_train_date]
p_train_data.index = pd.to_datetime(p_train_data.index)
p_train_data

In [None]:
p_test_date='2023-01-03'
p_test_data = prices[p_test_date :]
p_test_data.index = pd.to_datetime(p_test_data.index)
p_test_data

In [8]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
model = ARIMA(p_train_data, order=(0, 0, 0))
fitted = model.fit()

next_day_price = fitted.forecast()
predicted_price = next_day_price.values[0]
print("Predicted price for the 2023-01-03:", predicted_price)
print("Original price for the 2023-01-03:", p_test_data[0])
print("Difference between original and predicted price:", p_test_data[0] - predicted_price)

In [None]:
print(fitted.summary())

In [None]:
model = ARIMA(p_train_data, order=(4, 0, 4))
fitted = model.fit()

next_day_price = fitted.forecast()
predicted_price = next_day_price.values[0]
print("Predicted price for the 2023-01-03:", predicted_price)
print("Original price for the 2023-01-03:", p_test_data[0])
print("Difference between original and predicted price:", p_test_data[0] - predicted_price)

In [None]:
model = ARIMA(p_train_data, order=(4, 0, 6))
fitted = model.fit()

next_day_price = fitted.forecast()
predicted_price = next_day_price.values[0]
print("Predicted price for the 2023-01-03:", predicted_price)
print("Original price for the 2023-01-03:", p_test_data[0])
print("Difference between original and predicted price:", p_test_data[0] - predicted_price)

In [None]:
model = ARIMA(p_train_data, order=(6, 0, 6))
fitted = model.fit()

next_day_price = fitted.forecast()
predicted_price = next_day_price.values[0]
print("Predicted price for the 2023-01-03:", predicted_price)
print("Original price for the 2023-01-03:", p_test_data[0])
print("Difference between original and predicted price:", p_test_data[0] - predicted_price)

In [None]:
print(fitted.summary())

In [None]:
i=0
total_profit=0
total_loss=0
predictions=[]
for check in p_test_data:
    model = ARIMA(p_train_data, order=(5, 0, 4))
    fitted = model.fit()

    next_day_price = fitted.forecast()
    predicted_price = next_day_price.values[0]
    print(f"Predicted price for the {i+1} day:", predicted_price)
    predictions.append(predicted_price)
    print(f"Original price for the {i+1} day:", check)
    pr_ls= check - predicted_price
    if pr_ls<0:
        print("Loss of:", abs(pr_ls))
        total_loss+=pr_ls
    elif pr_ls==0:
        print("No Profit and No Loss")
    else:
        print("Profit of:", abs(pr_ls))
        total_profit+=pr_ls
    print("\n")
    p_train_data.loc[len(p_train_data)+i] = p_test_data.values[i]
    i=i+1

In [None]:
print("From January to March")
print("total profit of Rs.",total_profit)
print("Total loss of Rs.",total_loss)

In [None]:
predict = pd.DataFrame(predictions, index=p_test_data.index)
plt.figure(figsize=(10,4))

plt.plot(prices)
plt.plot(predict)
plt.legend(('Original Price', 'Predicted Price'), fontsize=16)


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

In [None]:
residuals = p_test_data - predictions
print("The accuracy of the ARIMA model for the test/train data is as stated as below:")
print('Mean Absolute Error:', mean_absolute_error(p_test_data, predictions))
print('Mean Squared Error:', mean_squared_error(p_test_data, predictions))
print('Root Mean Squared Error:', np.sqrt(np.mean(residuals**2)))
print('Mean Absolute Percentage Error:', mean_absolute_percentage_error(p_test_data, predictions))

# Predicting Stck Price for tomorrow (04-03-2023) using ARIMA

In [None]:
import datetime
end_date = datetime.date.today()
start_date = end_date - datetime.timedelta(days=365)
prices = data.history(start=start_date, end=end_date).Close
prices

In [None]:
model = ARIMA(prices, order=(0, 0, 0))
fitted = model.fit()

next_day_price = fitted.forecast()
predicted_price = next_day_price.values[0]
print(f"Predicted price for the {end_date}, i.e. tomorrow:", predicted_price)

# Function to Analyse Stock for average and deviation returns
> User Inputs:
* Provide start date input of the form: dd-mm-yyyy 
* Provide end date input of the form: dd-mm-yyyy 
* Provide Retrun Period Weeks
* Provide Minimum Avg Return
* Provide Maximum Deviation Return
* Calculate future price of stock using ARIMA 5,0,4

In [None]:
from pandas_datareader.nasdaq_trader import get_nasdaq_symbols
from pandas_datareader._utils import RemoteDataError

In [None]:
def plot_stock_trend_and_returns(prices, titles, start_date, end_date, all_returns):
    
    plt.figure(figsize=(10,6))
    
    plt.subplot(2,1,1)
    plt.plot(prices)
    plt.title(titles[0], fontsize=16)
    plt.ylabel('Price (Rs.)', fontsize=14)
    
    plt.subplot(2,1,2)
    plt.plot(all_returns[0], all_returns[1], color='g')
    plt.title(titles[1], fontsize=16)
    plt.ylabel('Return (Rs.)', fontsize=14)
    plt.axhline(0, color='k', linestyle='--')
    
    plt.tight_layout()
    plt.show()

In [None]:
def perform_analysis_for_stock(prices, start_date, end_date, return_period_weeks, verbose=False):    
    pct_return_after_period = []
    buy_dates = []

    for buy_date, buy_price in prices.iteritems():
        sell_date = buy_date + timedelta(weeks=return_period_weeks)
        
        try:
            sell_price = prices[prices.index == sell_date].iloc[0]
        except IndexError:
            continue
        
        pct_return = (sell_price - buy_price)/buy_price
        if pct_return>0:
            pct_return_after_period.append(pct_return)
            buy_dates.append(buy_date)
        
            if verbose:
                print('Date Buy: %s, Price Buy: %s'%(buy_date,round(buy_price,2)))
                print('Date Sell: %s, Price Sell: %s'%(sell_date,round(sell_price,2)))
                print('Return: %s%%'%round(pct_return*100,1))
                print('-------------------')
    
    if len(pct_return_after_period) == 0:
        return -np.inf, np.inf, None
    
    return np.mean(pct_return_after_period), np.std(pct_return_after_period), [buy_dates, pct_return_after_period]

In [None]:
from datetime import datetime

input_str=input("Provide start date input of the form: dd-mm-yyyy")
input_s_dt = datetime.strptime(input_str, '%d-%m-%Y')
input_str=input("Provide end date input of the form: dd-mm-yyyy")
input_e_dt = datetime.strptime(input_str, '%d-%m-%Y')
start_date, end_date = datetime(input_s_dt.year, input_s_dt.month, input_s_dt.day), datetime(input_e_dt.year, input_e_dt.month, input_e_dt.day)
return_period_weeks = int(input("Provide Retrun Period Weeks:"))
min_avg_return  = float(input("Provide Minimum Avg Return:"))
max_dev_return = float(input("Provide Maximum Deviation Return:"))

now = datetime.now()
if end_date > now:
    prices = data.history(start=start_date, end=now).Close
else: prices = data.history(start=start_date, end=end_date).Close
prices

In [None]:
import datetime
tz = 'Asia/Kolkata'
while now < end_date:
    model = ARIMA(prices, order=(5, 0, 4))
    fitted = model.fit()
    next_day_price = fitted.forecast()
    predicted_price = next_day_price.values[0]
    ts = pd.Timestamp(now).tz_localize('UTC').tz_convert(tz)
    new_data = pd.Series([predicted_price], index=[ts])
    prices = pd.concat([prices, new_data])
    now = now + datetime.timedelta(days=1)

In [None]:
avg_return, dev_return, all_returns = perform_analysis_for_stock(prices, start_date, end_date, return_period_weeks,True)
title_price = 'NSEI'
title_return = 'Avg Return: %s%% | Dev Return: %s%%'%(round(100*avg_return,2), round(100*dev_return,2))
plot_stock_trend_and_returns(prices, [title_price, title_return], start_date, end_date, all_returns)
if avg_return > min_avg_return and dev_return < max_dev_return:
    print("The Avg Return and Dev Return were High!")
else:
    print("The Avg Return and Dev Return were too Low!")

# Function to simulate user amount and give return *based on user inputs*

In [None]:
def run_simulation(returns, prices, amt, thresh, order=(5,0,1), verbose=True, plot=True):
    curr_holding = False
    events_list = []
    init_amt = amt
    #go through dates
    for date, r in tqdm (returns.iloc[14:].items(), total=len(returns.iloc[14:])):
        #if you're currently holding the stock, sell it
        if curr_holding:
            sell_price = prices.loc[date]
            curr_holding=False
            ret = (sell_price-buy_price)/buy_price
            amt *= (1+ret)
            events_list.append(('s', date, ret))
            
            if verbose:
                print('Sold at $%s'%sell_price)
                print('Predicted Return: %s'%round(pred,4))
                print('Actual Return: %s'%(round(ret, 4)))
                print('=======================================')
            continue

        #get data til just before current date
        curr_data = returns[:date]
        model = ARIMA(curr_data, order=order).fit()
        pred = model.forecast().values[0]

        #if you predict a high enough return and not holding, buy stock
        if (not curr_holding) and pred > thresh:
            curr_holding = True
            buy_price = prices.loc[date]
            events_list.append(('b', date))
            if verbose:
                print('Bought at $%s'%buy_price)
                
    if verbose:
        print('Total Amount: $%s'%round(amt,2))
        
    #graph
    if plot:
        plt.figure(figsize=(10,4))
        plt.plot(prices[14:])

        y_lims = (int(prices.min()*.95), int(prices.max()*1.05))
        shaded_y_lims = int(prices.min()*.5), int(prices.max()*1.5)

        for idx, event in enumerate(events_list):
            plt.axvline(event[1], color='k', linestyle='--', alpha=0.4)
            if event[0] == 's':
                color = 'green' if event[2] > 0 else 'red'
                plt.fill_betweenx(range(*shaded_y_lims), 
                                  event[1], events_list[idx-1][1], color=color, alpha=0.1)

        tot_return = round(100*(amt / init_amt - 1), 2)
        tot_return = str(tot_return) + '%'
        plt.title("%s Price Data\nThresh=%s\nTotal Amt: $%s\nTotal Return: %s"%(tickerSymbol, thresh, round(amt,2), tot_return), fontsize=20)
        plt.ylim(*y_lims)
        plt.show()
    
    return amt

In [None]:
from datetime import datetime
input_str=input("Provide start date input of the form: dd-mm-yyyy")
input_s_dt = datetime.strptime(input_str, '%d-%m-%Y')
input_str=input("Provide end date input of the form: dd-mm-yyyy")
input_e_dt = datetime.strptime(input_str, '%d-%m-%Y')
start_date, end_date = datetime(input_s_dt.year, input_s_dt.month, input_s_dt.day), datetime(input_e_dt.year, input_e_dt.month, input_e_dt.day)
prices = data.history(start=start_date, end=end_date).Close

input_amt=float(input("Provide Amount:"))
input_th=float(input("Provide Threshold:"))
returns = prices.pct_change().dropna()
run_simulation(returns, prices, input_amt, input_th)

# Storing Investment data in CSV file

In [6]:
import datetime
from datetime import timedelta, timezone
end_date = datetime.date.today() - datetime.timedelta(days=1)
start_date = end_date - datetime.timedelta(days=365)
open_prices = data.history(start=start_date, end=end_date).Open
close_prices = data.history(start=start_date, end=end_date).Close

In [7]:
model = ARIMA(open_prices, order=(5, 0, 4))
fitted = model.fit()

next_day_price = fitted.forecast()
predicted_open_price = next_day_price.values[0]

model = ARIMA(close_prices, order=(5, 0, 4))
fitted = model.fit()

next_day_price = fitted.forecast()
predicted_close_price = next_day_price.values[0]

In [8]:
end_date = datetime.date.today()
start_date = end_date - datetime.timedelta(days=365)
open_prices = data.history(start=start_date, end=end_date).Open
close_prices = data.history(start=start_date, end=end_date).Close

dev_open=open_prices[-1]-predicted_open_price
dev_close=close_prices[-1]-predicted_close_price

In [11]:
from datetime import datetime, timedelta
import datetime as _dt
import pandas as pd

total_days = int(input("Provide total number of days to hold stock: "))
input_dt = input("Provide date input of the form: dd-mm-yyyy ")
input__dt = datetime.strptime(input_dt, '%d-%m-%Y')
start_date = datetime(input__dt.year, input__dt.month, input__dt.day)

def next_weekday(date):
    while date.weekday() >= 5: # if weekday is Sat or Sun
        date += timedelta(days=1)
    return date

start_date= next_weekday(start_date)
end_date = next_weekday(start_date + timedelta(days=total_days+1))

input_amt = float(input("Provide Amount to Invest: "))
input_th = float(input("Provide Minimum Return: "))
if total_days==1:
    date_range = pd.date_range(start=start_date, end=start_date, freq='B')
    dates = pd.Series(date_range)
else:
    date_range = pd.date_range(start=start_date, end=end_date, freq='B')
    dates = pd.Series(date_range)
dates

Provide total number of days to hold stock:  1
Provide date input of the form: dd-mm-yyyy  08-03-2023
Provide Amount to Invest:  10
Provide Minimum Return:  11


0   2023-03-08
dtype: datetime64[ns]

In [14]:
import datetime
from datetime import datetime as _dt
tz = 'Asia/Kolkata'
data_=[]
now = _dt.now()

for i in range(len(dates)):
    
    model = ARIMA(open_prices, order=(5, 0, 4))
    fitted = model.fit()
    next_day_price = fitted.forecast()
    predicted_open_price = next_day_price.values[0]
    ts = pd.Timestamp(now).tz_localize('UTC').tz_convert(tz)
    new_data = pd.Series([predicted_open_price], index=[ts])
    open_prices = pd.concat([open_prices, new_data])
    
    model = ARIMA(close_prices, order=(5, 0, 4))
    fitted = model.fit()
    next_day_price = fitted.forecast()
    predicted_close_price = next_day_price.values[0]
    ts = pd.Timestamp(now).tz_localize('UTC').tz_convert(tz)
    new_data = pd.Series([predicted_close_price], index=[ts])
    close_prices = pd.concat([close_prices, new_data])
    
    if total_days==1:
        sell_price=(predicted_close_price-predicted_open_price)/predicted_close_price*100*input_amt+input_amt
        data_.append({'Date': dates[i].strftime('%Y-%m-%d'),'Day': dates[i].strftime('%A'), 'Open': predicted_open_price, 'Close': predicted_close_price,'Return' : predicted_close_price-predicted_open_price, 'Amount': input_amt ,'Selling Price': sell_price , 'Profit(+)/Loss(-)': sell_price-input_amt})
    else:
        if i==0:
            data_.append({'Date': dates[i].strftime('%Y-%m-%d'),'Day': dates[i].strftime('%A'), 'Open': predicted_open_price, 'Close': predicted_close_price,'Return' : predicted_close_price-predicted_open_price, 'Amount': input_amt ,'Selling Price': 'None' , 'Profit(+)/Loss(-)': 'None'})
        elif i==total_days-1:
            sell_price=(predicted_close_price-data_[0]['Close'])/predicted_close_price*100*input_amt+input_amt
            data_.append({'Date': dates[i].strftime('%Y-%m-%d'),'Day': dates[i].strftime('%A'), 'Open': predicted_open_price, 'Close': predicted_close_price,'Return' : predicted_close_price-predicted_open_price, 'Amount': 'None' ,'Selling Price': sell_price , 'Profit(+)/Loss(-)': sell_price-input_amt})
        else:
            data_.append({'Date': dates[i].strftime('%Y-%m-%d'),'Day': dates[i].strftime('%A'), 'Open': predicted_open_price, 'Close': predicted_close_price,'Return' : predicted_close_price-predicted_open_price, 'Amount': 'None' ,'Selling Price': 'None' , 'Profit(+)/Loss(-)': 'None'})

In [15]:
import csv
import os

# Define column headers
headers = ['Date', 'Day', 'Open', 'Close', 'Return', 'Amount', 'Selling Price', 'Profit(+)/Loss(-)']

if os.path.exists('/kaggle/working/nsei_stock.csv'):
    with open(filename, mode='a', newline='') as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=headers)
        for row in data_:
            writer.writerow(row)
else:
# Write data to CSV file
    with open('nsei_stock.csv', mode='w', newline='') as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=headers)
        writer.writeheader()
        for row in data_:
            writer.writerow(row)

data_

[{'Date': '2023-03-08',
  'Day': 'Wednesday',
  'Open': 17434.795363319157,
  'Close': 17591.538320663993,
  'Return': 156.7429573448353,
  'Amount': 10.0,
  'Selling Price': 18.910133638552594,
  'Profit(+)/Loss(-)': 8.910133638552594}]

# New Data Import

In [11]:
import datetime
from datetime import timedelta, timezone
open_prices = data.history(start='2017-01-01', end='2020-12-30').Open
close_prices = data.history(start='2017-01-01', end='2020-12-30').Close
test_date_range = pd.date_range(start='2021-01-01', end='2022-12-30', freq='B')
dates = pd.Series(date_range)
dates

0     2021-01-01
1     2021-01-04
2     2021-01-05
3     2021-01-06
4     2021-01-07
         ...    
516   2022-12-26
517   2022-12-27
518   2022-12-28
519   2022-12-29
520   2022-12-30
Length: 521, dtype: datetime64[ns]

In [12]:
import datetime
from datetime import datetime as _dt
tz = 'Asia/Kolkata'
data_=[]
now = _dt.now()

for i in range(len(dates)):
    
    model = ARIMA(open_prices, order=(5, 0, 4))
    fitted = model.fit()
    next_day_price = fitted.forecast()
    predicted_open_price = next_day_price.values[0]
    ts = pd.Timestamp(now).tz_localize('UTC').tz_convert(tz)
    new_data = pd.Series([predicted_open_price], index=[ts])
    open_prices = pd.concat([open_prices, new_data])
    
    model = ARIMA(close_prices, order=(5, 0, 4))
    fitted = model.fit()
    next_day_price = fitted.forecast()
    predicted_close_price = next_day_price.values[0]
    ts = pd.Timestamp(now).tz_localize('UTC').tz_convert(tz)
    new_data = pd.Series([predicted_close_price], index=[ts])
    close_prices = pd.concat([close_prices, new_data])
    
    if predicted_open_price>predicted_close_price:
        direction='SELL'
        data_.append({'Date': dates[i].strftime('%Y-%m-%d'),'Day': dates[i].strftime('%A'), 'Predicted Direction' : direction, 'Open': predicted_open_price, 'Close': predicted_close_price, 'Pnl':predicted_open_price-predicted_close_price})
    elif predicted_open_price<predicted_close_price:
        direction='BUY'
        data_.append({'Date': dates[i].strftime('%Y-%m-%d'),'Day': dates[i].strftime('%A'), 'Predicted Direction' : direction, 'Open': predicted_open_price, 'Close': predicted_close_price, 'Pnl':predicted_close_price-predicted_open_price})

KeyboardInterrupt: 

In [14]:
import csv
import os

# Define column headers
headers = ['Date', 'Day', 'Predicted Direction', 'Open', 'Close', 'Pnl']

if os.path.exists('/kaggle/working/nsei_stock.csv'):
    with open('nsei_stock.csv', mode='a', newline='') as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=headers)
        for row in data_:
            writer.writerow(row)
else:
# Write data to CSV file
    with open('nsei_stock.csv', mode='w', newline='') as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=headers)
        writer.writeheader()
        for row in data_:
            writer.writerow(row)

data_

[{'Date': '2021-01-01',
  'Day': 'Friday',
  'Predicted Direction': 'SELL',
  'Open': 13978.289539905812,
  'Close': 13967.621115995815,
  'Pnl': 10.668423909997728},
 {'Date': '2021-01-04',
  'Day': 'Monday',
  'Predicted Direction': 'SELL',
  'Open': 13989.149733152612,
  'Close': 13944.753073950998,
  'Pnl': 44.39665920161315},
 {'Date': '2021-01-05',
  'Day': 'Tuesday',
  'Predicted Direction': 'SELL',
  'Open': 14008.502817778131,
  'Close': 13930.720286212432,
  'Pnl': 77.78253156569917},
 {'Date': '2021-01-06',
  'Day': 'Wednesday',
  'Predicted Direction': 'SELL',
  'Open': 13995.677067594275,
  'Close': 13946.97424069146,
  'Pnl': 48.702826902816014},
 {'Date': '2021-01-07',
  'Day': 'Thursday',
  'Predicted Direction': 'SELL',
  'Open': 13960.989261762494,
  'Close': 13919.837448666423,
  'Pnl': 41.15181309607033},
 {'Date': '2021-01-08',
  'Day': 'Friday',
  'Predicted Direction': 'SELL',
  'Open': 13963.83188660031,
  'Close': 13931.41939445416,
  'Pnl': 32.412492146149816}