## IMPORT REQUIRED LIBRARIES

In [69]:

import pandas as pd
from datetime import datetime
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')

## LOAD THE DATA

In [70]:
url = 'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=RELIANCE.BSE&outputsize=full&apikey=AUPPYZ3PTCAM6OO3&datatype=csv'
data = pd.read_csv(url)

#### We used the API provided by www.alphavantage.co and imported it using pandas

## DATA DESCRIPTION

In [71]:
data.head()

Unnamed: 0,timestamp,open,high,low,close,adjusted_close,volume,dividend_amount,split_coefficient
0,2023-03-27,2215.5,2254.0,2200.5,2237.5,2237.5,315446,0.0,1.0
1,2023-03-24,2247.6001,2250.0,2198.0,2203.5,2203.5,178216,0.0,1.0
2,2023-03-23,2270.0,2270.0,2243.75,2247.55,2247.55,208447,0.0,1.0
3,2023-03-22,2280.1001,2291.3,2270.0,2276.6499,2276.6499,230717,0.0,1.0
4,2023-03-21,2222.0,2273.8501,2218.0,2270.05,2270.05,441746,0.0,1.0


In [72]:
data.describe()

Unnamed: 0,open,high,low,close,adjusted_close,volume,dividend_amount,split_coefficient
count,4494.0,4494.0,4494.0,4494.0,4494.0,4494.0,4494.0,4494.0
mean,1332.183349,1349.989718,1313.831924,1331.327741,804.289234,3104615.0,0.018539,1.000447
std,619.586969,629.267389,609.890287,619.519153,709.727968,21178980.0,0.311301,0.021094
min,377.5176,381.5171,372.4343,374.8265,80.5316,42452.0,0.0,1.0
25%,893.7747,903.6372,883.0497,892.7747,383.284875,454013.5,0.0,1.0
50%,1064.9997,1078.74965,1051.1747,1064.54965,471.8063,934564.0,0.0,1.0
75%,1769.9994,1802.8249,1745.249425,1775.061975,1068.41115,2692082.0,0.0,1.0
max,3215.999,3252.0991,3135.2491,3216.299,2810.7354,802184100.0,8.0,2.0


## PREPARING THE DATA TO FIT INTO THE MODEL

In [73]:
data = data.drop(['adjusted_close'], axis=1)
data['timestamp'] = data['timestamp'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d').toordinal())

## SPLIT INTO INPUT AND TARGET

In [74]:
X = data.drop(['close'], axis=1)
y = data['close']

## USING GRIDSEARCHCV TO FIND THE BEST PARAMETERS

In [75]:
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [None]:
rf = RandomForestRegressor()

In [76]:
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X, y)

## FITTING THE MODEL

In [77]:
rf = RandomForestRegressor(n_estimators=grid_search.best_params_['n_estimators'],
                            max_depth=grid_search.best_params_['max_depth'],
                            min_samples_split=grid_search.best_params_['min_samples_split'],
                            min_samples_leaf=grid_search.best_params_['min_samples_leaf'])

rf.fit(X, y)

## PLOTTING PREDICTED AGAINST THE ACTUAL DATA

In [78]:
y_pred = rf.predict(X)
data['timestamp'] = data['timestamp'].apply(lambda x: datetime.fromordinal(x))
df = pd.DataFrame({'timestamp': data['timestamp'], 'actual': y, 'predicted': y_pred})
fig = px.line(df, x='timestamp', y=['actual', 'predicted'], title='Actual vs Predicted Close')
fig.show()

## PREDICTING FOR A RANDOM VALUE

In [79]:
ordinal = datetime.strptime('2023-03-27', '%Y-%m-%d').toordinal()
predicted_close = rf.predict([[ordinal, data['open'][0], data['high'][0], data['low'][0], data['volume'][0], data['dividend_amount'][0], data['split_coefficient'][0]]])[0]
print('Predicted Close:', predicted_close,ordinal)

actual_close = data.iloc[0]['close']
print('Actual Close:', actual_close)

Predicted Close: 2236.1539665357136 738606
Actual Close: 2237.5
