In [28]:
import yfinance as yf
import  matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.io as pio
import plotly.graph_objects as go
import plotly.express as px
from sklearn.preprocessing import MinMaxScaler
from statsmodels.tsa.seasonal import seasonal_decompose
from plotly.subplots import make_subplots
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,classification_report
from sklearn.metrics import mean_absolute_error, mean_squared_error
from datetime import datetime,timedelta

In [30]:
import keras
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.metrics import MeanAbsolutePercentageError

# STOCK ANALYSIS


In [31]:
class StockAnalysis:
    def __init__(self, stock, start_date, end_date):
        self.stock = stock
        self.start_date = start_date
        self.end_date = end_date
        self.data = None
# We are Downlording the data from Yahoo finance 
    def download_data(self):
        self.data = yf.download(self.stock, self.start_date, self.end_date)
        return self.data
# Using Plotly we are ploting Candle stick graph
    def plot_candlestick(self, data):
        fig = go.Figure(data=[go.Candlestick(
            x=data.index,
            open=data['Open'],
            high=data['High'],
            low=data['Low'],
            close=data['Adj Close']
        )])
        fig.update_layout(
            title_text= self.stock + " Stock Price",
            xaxis_rangeslider_visible=False,
            yaxis_fixedrange=False,
            yaxis_type="linear"
        )
        fig.show()

# To verify the trend and seasonality we have plot seasonal decomposition 
    def visualize_data(self):
        if self.data is None:
            self.data = self.download_data()

        self.plot_candlestick(self.data)

        result = seasonal_decompose(self.data["Adj Close"], model="additive", period=30)

        fig = make_subplots(rows=4, cols=1, shared_xaxes=True, vertical_spacing=0.05, subplot_titles=("Original Data", "Trend", "Seasonal", "Residual"))

        fig.add_trace(go.Scatter(x=self.data.index, y=self.data["Adj Close"], mode='lines', name="Original Data", line=dict(color='blue')), row=1, col=1)

        fig.add_trace(go.Scatter(x=self.data.index, y=result.trend, mode='lines', name="Trend", line=dict(color='green')), row=2, col=1)

        fig.add_trace(go.Scatter(x=self.data.index, y=result.seasonal, mode='lines', name="Seasonal", line=dict(color='orange')), row=3, col=1)

        fig.add_trace(go.Scatter(x=self.data.index, y=result.resid, mode='lines', name="Residual", line=dict(color='red')), row=4, col=1)

        fig.update_layout(title_text="Seasonal Decomposition of " + self. stock +" Stock Price",
                          height=800, width=1600,
                          showlegend=False)

        fig.update_yaxes(title_text="Value", row=1, col=1)
        fig.update_yaxes(title_text="Trend", row=2, col=1)
        fig.update_yaxes(title_text="Seasonal", row=3, col=1)
        fig.update_yaxes(title_text="Residual", row=4, col=1)
        fig.show()



In [32]:
stock = "META"
end_date = datetime.now().strftime('%Y-%m-%d')
start_date = (datetime.now() - pd.DateOffset(years=10)).strftime('%Y-%m-%d')
stock_analysis = StockAnalysis(stock, start_date, end_date)
Plot_graph = stock_analysis.visualize_data()
data = stock_analysis.download_data()
data

[*********************100%%**********************]  1 of 1 completed


[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-04-28,58.049999,58.310001,54.660000,56.139999,56.080498,107758000
2014-04-29,56.090000,58.279999,55.840000,58.150002,58.088367,75557000
2014-04-30,57.580002,59.849998,57.160000,59.779999,59.716640,76093000
2014-05-01,60.430000,62.279999,60.209999,61.150002,61.085190,82429000
2014-05-02,61.299999,61.889999,60.180000,60.459999,60.395920,54189000
...,...,...,...,...,...,...
2024-04-22,489.720001,492.010010,473.399994,481.730011,481.730011,17271100
2024-04-23,491.250000,498.760010,488.970001,496.100006,496.100006,15079200
2024-04-24,508.059998,510.000000,484.579987,493.500000,493.500000,37772700
2024-04-25,421.399994,445.769989,414.500000,441.380005,441.380005,82890700


## Close price forcasting using EMA 
### Ft+1=kXt+(1−k)Ft,
### Ft+1 = forecast for Next day
### Ft = forecast of today  
### Xt = close price of today
### k = Smoothing Factor

In [83]:
class EMA_Forecaster:
    def __init__(self, data):
        self.data = data
        self.EMAs = {}
        self.forecasted_values = {}

    def calculate_EMA(self, period):
        ema_key = f'EMA_{period}'
        self.EMAs[ema_key] = self.data["Adj Close"].ewm(span=period).mean()

    def plot_EMA(self, periods):
        fig = go.Figure()
        for period in periods:
            ema_key = f'EMA_{period}'
            fig.add_trace(go.Scatter(x=self.EMAs[ema_key].index, y=self.EMAs[ema_key].values, mode='lines', name=f'EMA {period}'))
        fig.update_layout(title='EMA for Different Periods', xaxis_title='Date', yaxis_title='EMA')
        fig.show()

    def forecast_adj_close(self, periods, k, days):
        for period in periods:
            ema_key = f'EMA_{period}'
            EMA_values_forecasted = []
            current_EMA = self.EMAs[ema_key].values[-1]

            for i in range(days):
                a = k * self.data["Adj Close"].values[-1] + (1 - k) * current_EMA
                EMA_values_forecasted.append(a)
                current_EMA = a

            self.forecasted_values[period] = np.array(EMA_values_forecasted)
            fig = go.Figure()
            fig.add_trace(go.Scatter(x=self.data.index, y=self.data["Adj Close"], mode='lines', name='Actual Adj Close'))
            fig.add_trace(go.Scatter(x=pd.date_range(start=self.data.index[-1], periods=days, freq='B'), y=EMA_values_forecasted, mode='lines', name=f'Forecasted Adj Close - EMA {period}'))
            fig.update_layout(title=f'Forecasted Adj Close for Next {days} Days - EMA {period}', xaxis_title='Date', yaxis_title='Adjusted Close Price')
            fig.show()

            # Calculate accuracy metrics
            actual = self.data["Adj Close"].values[-days:]
            mae = mean_absolute_error(actual, EMA_values_forecasted)
            rmse = np.sqrt(mean_squared_error(actual, EMA_values_forecasted))
            mape = np.mean(np.abs((actual - EMA_values_forecasted) / actual)) * 100
            print(f"EMA {period} Forecast Accuracy (MAE): {mae:.2f}")
            print(f"EMA {period} Forecast Accuracy (RMSE): {rmse:.2f}")
            print(f"EMA {period} Forecast Accuracy (MAPE): {mape:.2f}%")

        return self.forecasted_values

    def print_forecasted_values_as_dataframe(self):
        df = pd.DataFrame(self.forecasted_values)
        df.index = pd.date_range(start=self.data.index[-1], periods=len(df), freq='B')
        print("Forecasted Adjusted Close Values:")
        print(df)

# Assuming 'data' is your DataFrame containing historical stock data
ema_forecaster = EMA_Forecaster(data)

ema_forecaster.calculate_EMA(50)
ema_forecaster.calculate_EMA(100)
ema_forecaster.calculate_EMA(150)
ema_forecaster.calculate_EMA(200)
ema_forecaster.calculate_EMA(250)

forecasted_values = ema_forecaster.forecast_adj_close([50, 100, 150, 200, 250], 0.5, 14)

ema_forecaster.print_forecasted_values_as_dataframe()

EMA 50 Forecast Accuracy (MAE): 47.54
EMA 50 Forecast Accuracy (RMSE): 52.12
EMA 50 Forecast Accuracy (MAPE): 9.44%


EMA 100 Forecast Accuracy (MAE): 50.09
EMA 100 Forecast Accuracy (RMSE): 55.31
EMA 100 Forecast Accuracy (MAPE): 9.93%


EMA 150 Forecast Accuracy (MAE): 52.33
EMA 150 Forecast Accuracy (RMSE): 58.40
EMA 150 Forecast Accuracy (MAPE): 10.37%


EMA 200 Forecast Accuracy (MAE): 54.16
EMA 200 Forecast Accuracy (RMSE): 61.10
EMA 200 Forecast Accuracy (MAPE): 10.72%


EMA 250 Forecast Accuracy (MAE): 55.64
EMA 250 Forecast Accuracy (RMSE): 63.36
EMA 250 Forecast Accuracy (MAPE): 11.01%
Forecasted Adjusted Close Values:
                   50          100         150         200         250
2024-04-26  461.786437  443.982628  428.264145  415.424391  405.121805
2024-04-29  452.538223  443.636318  435.777077  429.357200  424.205907
2024-04-30  447.914116  443.463163  439.533543  436.323604  433.747958
2024-05-01  445.602062  443.376586  441.411776  439.806806  438.518983
2024-05-02  444.446035  443.333297  442.350892  441.548407  440.904496
2024-05-03  443.868022  443.311653  442.820450  442.419208  442.097252
2024-05-06  443.579015  443.300831  443.055229  442.854608  442.693630
2024-05-07  443.434512  443.295420  443.172619  443.072308  442.991819
2024-05-08  443.362260  443.292714  443.231314  443.181158  443.140914
2024-05-09  443.326134  443.291361  443.260661  443.235584  443.215461
2024-05-10  443.308071  443.290685  443.275335  443.262796  443.2

### SCALING THE DATA

In [34]:
input = data[["Adj Close"]]
input

Unnamed: 0_level_0,Adj Close
Date,Unnamed: 1_level_1
2014-04-28,56.080498
2014-04-29,58.088367
2014-04-30,59.716640
2014-05-01,61.085190
2014-05-02,60.395920
...,...
2024-04-22,481.730011
2024-04-23,496.100006
2024-04-24,493.500000
2024-04-25,441.380005


In [35]:
scale = MinMaxScaler(feature_range= (0,1))
scale_input = scale.fit_transform(input)
scale_input

array([[0.        ],
       [0.00426065],
       [0.0077158 ],
       ...,
       [0.92819238],
       [0.81759515],
       [0.82164813]])

### Data PreProcessing

In this below code we are considering the first 100 element in X
And next 101 is going into y = response
for next iteration 2 -101 append into X
in y we will append 102 element

In [36]:
sequence = 100
X = np.array([scale_input[i-sequence:i] for i in range(sequence, len(scale_input))])
y = scale_input[sequence:]
X.shape, y.shape

((2418, 100, 1), (2418, 1))

### Train Test Split

In [37]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((1934, 100, 1), (484, 100, 1), (1934, 1), (484, 1))

### Implementing Neural Network

In [38]:
Model = Sequential()
Model.add(LSTM(128,return_sequences= True,input_shape = (X_train.shape[1],1)))
Model.add(LSTM(64,return_sequences= False))
Model.add(Dense(25))
Model.add(Dense(1))


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



In [39]:
mape_metric = MeanAbsolutePercentageError()
Model.compile(optimizer="adam", loss="mean_squared_error",metrics=[mape_metric])

In [59]:
Model = Model.fit(X_train,y_train,batch_size= 32,epochs= 15,validation_data=(X_test, y_test))
Model

Epoch 1/15
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 109ms/step - loss: 2.3858e-04 - mean_absolute_percentage_error: 4.8335 - val_loss: 7.9036e-04 - val_mean_absolute_percentage_error: 5.6173
Epoch 2/15
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 105ms/step - loss: 2.4152e-04 - mean_absolute_percentage_error: 4.7468 - val_loss: 6.5088e-04 - val_mean_absolute_percentage_error: 5.2759
Epoch 3/15
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 110ms/step - loss: 2.4289e-04 - mean_absolute_percentage_error: 4.2307 - val_loss: 7.1669e-04 - val_mean_absolute_percentage_error: 5.2391
Epoch 4/15
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 108ms/step - loss: 3.2501e-04 - mean_absolute_percentage_error: 5.2724 - val_loss: 7.8656e-04 - val_mean_absolute_percentage_error: 5.4486
Epoch 5/15
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 109ms/step - loss: 2.3109e-04 - mean_absolute_percentage_error: 4.438

<keras.src.callbacks.history.History at 0x1c13e037080>

### Checking the model loss accuracy using plot

In [70]:
train_loss = Model.history['loss']
val_loss = Model.history['val_loss']
epochs = range(1, len(train_loss) + 1)

fig = go.Figure()
fig.add_trace(go.Scatter(x=list(epochs), y=train_loss, mode='lines+markers', name='Training Loss', marker=dict(symbol='circle')))
fig.add_trace(go.Scatter(x=list(epochs), y=val_loss, mode='lines+markers', name='Validation Loss', marker=dict(symbol='x')))

fig.update_layout(title='Training and Validation Loss',
                  xaxis_title='Epoch',
                  yaxis_title='Loss')

fig.show()


### Predicting the Next 14 days stock Adj price using LSTM

In [84]:

class StockPredictor:
    def __init__(self, model, X_test, y_test):
        self.model = model
        self.X_test = X_test
        self.y_test = y_test

    def next_14_close(self, days):
        predictions = []
        actual = self.X_test[-1]
        for i in range(days):
            X_pred = self.model.predict(np.expand_dims(actual, axis=0))
            predictions.append(X_pred[0][0])
            print(predictions)
            actual = np.roll(actual, shift=-1, axis=0)
            actual[-1] = X_pred[0][0]
        return predictions

    def generate_predictions_dataframe(self, predictions, start_date):
        a = scale.inverse_transform(np.array(predictions).reshape(-1, 1))
        future_dates = pd.date_range(start=start_date, periods=len(predictions), freq='B').tolist()
        df = pd.DataFrame({'Adj Close': a.reshape(-1)}, index=future_dates)
        return df


stock_predictor = StockPredictor(Model.model, X_test, y_test)
predictions = stock_predictor.next_14_close(20)
start_date = '2024-04-23'
predictions_df = stock_predictor.generate_predictions_dataframe(predictions, start_date)

# Combine actual and forecasted data
combined_data = pd.concat([data[['Adj Close']], predictions_df], axis=0)
combined_data

# Plot actual and forecasted prices
fig = go.Figure()
fig.add_trace(go.Scatter(x=data.index, y=data['Adj Close'], mode='lines', name='Actual Price', line=dict(color='light blue')))
fig.add_trace(go.Scatter(x=predictions_df.index, y=predictions_df['Adj Close'], mode='lines', name='Forecasted Price', line=dict(color='green')))
fig.update_layout(title='Actual vs Forecasted Prices', xaxis_title='Date', yaxis_title='Adjusted Close Price',
                  xaxis=dict(type='date', tickformat='%Y-%m-%d', range=[data.index[0], predictions_df.index[-1]]))
fig.show()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[0.8655164]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[0.8655164, 0.85235703]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[0.8655164, 0.85235703, 0.84678614]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[0.8655164, 0.85235703, 0.84678614, 0.8438455]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[0.8655164, 0.85235703, 0.84678614, 0.8438455, 0.8421764]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[0.8655164, 0.85235703, 0.84678614, 0.8438455, 0.8421764, 0.84111387]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[0.8655164, 0.85235703, 0.84678614, 0.8438455, 0.8421764, 0.84111387, 0.8402309]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[0.8655164, 0.85235703, 0.84678614, 0.8438455, 0.8421764, 0.84111387, 0.8402309, 0.8392555]
[1m