In [None]:
#| default_exp models.itransformer

In [1]:
!pip install neuralforecast
!pip install nbdev
!pip install optuna
!pip install yfinance

Collecting neuralforecast
  Downloading neuralforecast-1.7.2-py3-none-any.whl.metadata (16 kB)
Collecting coreforecast>=0.0.6 (from neuralforecast)
  Downloading coreforecast-0.0.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)
Collecting utilsforecast>=0.0.25 (from neuralforecast)
  Downloading utilsforecast-0.1.10-py3-none-any.whl.metadata (7.4 kB)
Downloading neuralforecast-1.7.2-py3-none-any.whl (221 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m221.5/221.5 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading coreforecast-0.0.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (223 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m223.4/223.4 kB[0m [31m15.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading utilsforecast-0.1.10-py3-none-any.whl (40 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.5/40.5 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages:

In [3]:
#| hide
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
#| hide
from fastcore.test import test_eq
from nbdev.showdoc import show_doc

# iTransformer

The iTransformer model simply takes the Transformer architecture but it applies the attention and feed-forward network on the inverted dimensions. This means that time points of each individual series are embedded into tokens. That way, the attention mechanisms learn multivariate correlation and the feed-forward network learns non-linear relationships.

**References**
- [Yong Liu, Tengge Hu, Haoran Zhang, Haixu Wu, Shiyu Wang, Lintao Ma, Mingsheng Long. "iTransformer: Inverted Transformers Are Effective for Time Series Forecasting"](https://arxiv.org/abs/2310.06625)

![Figure 1. Architecture of iTransformer.](imgs_models/itransformer.png)

In [5]:
#| export
import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np

from typing import Optional
from math import sqrt

from neuralforecast.losses.pytorch import MAE, MSE
from neuralforecast.common._base_multivariate import BaseMultivariate

from neuralforecast.common._modules import TransEncoder, TransEncoderLayer, AttentionLayer

# 1. Auxiliary functions

## 1.1 Attention

In [6]:
#| exporti

class TriangularCausalMask():
    def __init__(self, B, L, device="cpu"):
        mask_shape = [B, 1, L, L]
        with torch.no_grad():
            self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)

    @property
    def mask(self):
        return self._mask

class FullAttention(nn.Module):
    def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
        super(FullAttention, self).__init__()
        self.scale = scale
        self.mask_flag = mask_flag
        self.output_attention = output_attention
        self.dropout = nn.Dropout(attention_dropout)

    def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
        B, L, H, E = queries.shape
        _, S, _, D = values.shape
        scale = self.scale or 1. / sqrt(E)

        scores = torch.einsum("blhe,bshe->bhls", queries, keys)

        if self.mask_flag:
            if attn_mask is None:
                attn_mask = TriangularCausalMask(B, L, device=queries.device)

            scores.masked_fill_(attn_mask.mask, -np.inf)

        A = self.dropout(torch.softmax(scale * scores, dim=-1))
        V = torch.einsum("bhls,bshd->blhd", A, values)

        if self.output_attention:
            return (V.contiguous(), A)
        else:
            return (V.contiguous(), None)

## 1.2 Inverted embedding

In [7]:
#| exporti

class DataEmbedding_inverted(nn.Module):
    def __init__(self, c_in, hidden_size, dropout=0.1):
        super(DataEmbedding_inverted, self).__init__()
        self.value_embedding = nn.Linear(c_in, hidden_size)
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, x, x_mark):
        x = x.permute(0, 2, 1)
        # x: [Batch Variate Time]
        if x_mark is None:
            x = self.value_embedding(x)
        else:
            # the potential to take covariates (e.g. timestamps) as tokens
            x = self.value_embedding(torch.cat([x, x_mark.permute(0, 2, 1)], 1))
        # x: [Batch Variate hidden_size]
        return self.dropout(x)

# 2. Model

In [8]:
#| export

class iTransformer(BaseMultivariate):

    """ iTransformer

    **Parameters:**<br>
    `h`: int, Forecast horizon. <br>
    `input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].<br>
    `n_series`: int, number of time-series.<br>
    `futr_exog_list`: str list, future exogenous columns.<br>
    `hist_exog_list`: str list, historic exogenous columns.<br>
    `stat_exog_list`: str list, static exogenous columns.<br>
    `hidden_size`: int, dimension of the model.<br>
    `n_heads`: int, number of heads.<br>
    `e_layers`: int, number of encoder layers.<br>
    `d_layers`: int, number of decoder layers.<br>
    `d_ff`: int, dimension of fully-connected layer.<br>
    `factor`: int, attention factor.<br>
    `dropout`: float, dropout rate.<br>
    `use_norm`: bool, whether to normalize or not.<br>
    `loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).<br>
    `valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).<br>
    `max_steps`: int=1000, maximum number of training steps.<br>
    `learning_rate`: float=1e-3, Learning rate between (0, 1).<br>
    `num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.<br>
    `early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.<br>
    `val_check_steps`: int=100, Number of training steps between every validation loss check.<br>
    `batch_size`: int=32, number of different series in each batch.<br>
    `step_size`: int=1, step size between each window of temporal data.<br>
    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
    `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.<br>
    `drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.<br>
    `alias`: str, optional,  Custom name of the model.<br>
    `optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).<br>
    `optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.<br>
    `**trainer_kwargs`: int,  keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).<br>

    **References**<br>
    - [Yong Liu, Tengge Hu, Haoran Zhang, Haixu Wu, Shiyu Wang, Lintao Ma, Mingsheng Long. "iTransformer: Inverted Transformers Are Effective for Time Series Forecasting"](https://arxiv.org/abs/2310.06625)
    """

    # Class attributes
    SAMPLING_TYPE = 'multivariate'

    def __init__(self,
                 h,
                 input_size,
                 n_series,
                 futr_exog_list = None,
                 hist_exog_list = None,
                 stat_exog_list = None,
                 hidden_size: int = 512,
                 n_heads: int = 8,
                 e_layers: int = 2,
                 d_layers: int = 1,
                 d_ff: int = 2048,
                 factor: int = 1,
                 dropout: float = 0.1,
                 use_norm: bool = True,
                 loss = MAE(),
                 valid_loss = None,
                 max_steps: int = 1000,
                 learning_rate: float = 1e-3,
                 num_lr_decays: int = -1,
                 early_stop_patience_steps: int =-1,
                 val_check_steps: int = 100,
                 batch_size: int = 32,
                 step_size: int = 1,
                 scaler_type: str = 'identity',
                 random_seed: int = 1,
                 num_workers_loader: int = 0,
                 drop_last_loader: bool = False,
                 optimizer = None,
                 optimizer_kwargs = None,
                 **trainer_kwargs):

        super(iTransformer, self).__init__(h=h,
                                           input_size=input_size,
                                           n_series=n_series,
                                           stat_exog_list = None,
                                           futr_exog_list = None,
                                           hist_exog_list = None,
                                           loss=loss,
                                           valid_loss=valid_loss,
                                           max_steps=max_steps,
                                           learning_rate=learning_rate,
                                           num_lr_decays=num_lr_decays,
                                           early_stop_patience_steps=early_stop_patience_steps,
                                           val_check_steps=val_check_steps,
                                           batch_size=batch_size,
                                           step_size=step_size,
                                           scaler_type=scaler_type,
                                           random_seed=random_seed,
                                           num_workers_loader=num_workers_loader,
                                           drop_last_loader=drop_last_loader,
                                           optimizer=optimizer,
                                           optimizer_kwargs=optimizer_kwargs,
                                           **trainer_kwargs)

        # Asserts
        if stat_exog_list is not None:
            raise Exception("iTransformer does not support static exogenous variables")
        if futr_exog_list is not None:
            raise Exception("iTransformer does not support future exogenous variables")
        if hist_exog_list is not None:
            raise Exception("iTransformer does not support historical exogenous variables")

        self.enc_in = n_series
        self.dec_in = n_series
        self.c_out = n_series
        self.hidden_size = hidden_size
        self.n_heads = n_heads
        self.e_layers = e_layers
        self.d_layers = d_layers
        self.d_ff = d_ff
        self.factor = factor
        self.dropout = dropout
        self.use_norm = use_norm

        # Architecture
        self.enc_embedding = DataEmbedding_inverted(input_size, self.hidden_size, self.dropout)

        self.encoder = TransEncoder(
            [
                TransEncoderLayer(
                    AttentionLayer(
                        FullAttention(False, self.factor, attention_dropout=self.dropout), self.hidden_size, self.n_heads),
                    self.hidden_size,
                    self.d_ff,
                    dropout=self.dropout,
                    activation=F.gelu
                ) for l in range(self.e_layers)
            ],
            norm_layer=torch.nn.LayerNorm(self.hidden_size)
        )

        self.projector = nn.Linear(self.hidden_size, h, bias=True)

    def forecast(self, x_enc):
        if self.use_norm:
            # Normalization from Non-stationary Transformer
            means = x_enc.mean(1, keepdim=True).detach()
            x_enc = x_enc - means
            stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
            x_enc /= stdev

        _, _, N = x_enc.shape # B L N
        # B: batch_size;       E: hidden_size;
        # L: input_size;       S: horizon(h);
        # N: number of variate (tokens), can also includes covariates

        # Embedding
        # B L N -> B N E                (B L N -> B L E in the vanilla Transformer)
        enc_out = self.enc_embedding(x_enc, None) # covariates (e.g timestamp) can be also embedded as tokens

        # B N E -> B N E                (B L E -> B L E in the vanilla Transformer)
        # the dimensions of embedded time series has been inverted, and then processed by native attn, layernorm and ffn modules
        enc_out, attns = self.encoder(enc_out, attn_mask=None)

        # B N E -> B N S -> B S N
        dec_out = self.projector(enc_out).permute(0, 2, 1)[:, :, :N] # filter the covariates

        if self.use_norm:
            # De-Normalization from Non-stationary Transformer
            dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.h, 1))
            dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.h, 1))

        return dec_out

    def forward(self, windows_batch):
        insample_y = windows_batch['insample_y']

        y_pred = self.forecast(insample_y)
        y_pred = y_pred[:, -self.h:, :]
        y_pred = self.loss.domain_map(y_pred)

        # domain_map might have squeezed the last dimension in case n_series == 1
        if y_pred.ndim == 2:
            return y_pred.unsqueeze(-1)
        else:
            return y_pred


In [9]:
show_doc(iTransformer)

---

### iTransformer

>      iTransformer (h, input_size, n_series, futr_exog_list=None,
>                    hist_exog_list=None, stat_exog_list=None,
>                    hidden_size:int=512, n_heads:int=8, e_layers:int=2,
>                    d_layers:int=1, d_ff:int=2048, factor:int=1,
>                    dropout:float=0.1, use_norm:bool=True, loss=MAE(),
>                    valid_loss=None, max_steps:int=1000,
>                    learning_rate:float=0.001, num_lr_decays:int=-1,
>                    early_stop_patience_steps:int=-1, val_check_steps:int=100,
>                    batch_size:int=32, step_size:int=1,
>                    scaler_type:str='identity', random_seed:int=1,
>                    num_workers_loader:int=0, drop_last_loader:bool=False,
>                    optimizer=None, optimizer_kwargs=None, **trainer_kwargs)

*iTransformer

**Parameters:**<br>
`h`: int, Forecast horizon. <br>
`input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].<br>
`n_series`: int, number of time-series.<br>
`futr_exog_list`: str list, future exogenous columns.<br>
`hist_exog_list`: str list, historic exogenous columns.<br>
`stat_exog_list`: str list, static exogenous columns.<br>
`hidden_size`: int, dimension of the model.<br>
`n_heads`: int, number of heads.<br>
`e_layers`: int, number of encoder layers.<br>
`d_layers`: int, number of decoder layers.<br>
`d_ff`: int, dimension of fully-connected layer.<br>
`factor`: int, attention factor.<br>
`dropout`: float, dropout rate.<br>
`use_norm`: bool, whether to normalize or not.<br>
`loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).<br>
`valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).<br>
`max_steps`: int=1000, maximum number of training steps.<br>
`learning_rate`: float=1e-3, Learning rate between (0, 1).<br>
`num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.<br>
`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.<br>
`val_check_steps`: int=100, Number of training steps between every validation loss check.<br>
`batch_size`: int=32, number of different series in each batch.<br>
`step_size`: int=1, step size between each window of temporal data.<br>
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.<br>
`drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.<br>
`alias`: str, optional,  Custom name of the model.<br>
`optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).<br>
`optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.<br>
`**trainer_kwargs`: int,  keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).<br>

**References**<br>
- [Yong Liu, Tengge Hu, Haoran Zhang, Haixu Wu, Shiyu Wang, Lintao Ma, Mingsheng Long. "iTransformer: Inverted Transformers Are Effective for Time Series Forecasting"](https://arxiv.org/abs/2310.06625)*

In [None]:
show_doc(iTransformer.fit, name='iTransformer.fit')

In [None]:
show_doc(iTransformer.predict, name='iTransformer.predict')

# 3. Usage example

In [None]:
# !pip install -U ipywidgets

In [None]:
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import matplotlib.pyplot as plt

from neuralforecast import NeuralForecast
from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic
from neuralforecast.losses.pytorch import MSE, MAPE
from neuralforecast.auto import AutoiTransformer
import optuna
import warnings
warnings.filterwarnings('ignore')

In [None]:
import yfinance as yf
from datetime import datetime, date
from dateutil.relativedelta import relativedelta

ticker = 'INFY'
def get_historical_prices(ticker,year=10,columns=['Close']):
  symbol = ticker + ".NS"
  end_date = datetime.strptime("19-04-2024", "%d-%m-%Y")
  start_date = end_date - relativedelta(years=year)
  stock_data = yf.download(symbol, start = start_date, end=end_date)
  stock_data.reset_index(inplace=True)
  columns.insert(0,"Date")
  stock_data = stock_data[columns]
  stock_data.to_csv(f"{ticker}_hist_prices.csv")
  return stock_data

stock_data = get_historical_prices(ticker,year=10,columns=['Close'])
stock_data['unique_id'] = ticker
stock_data.rename({'Date':'ds','Close':'y'},axis = 1, inplace = True)
stock_data

In [None]:
train_data = stock_data
train_data

In [None]:
# import pandas as pd
# stock_data = pd.read_csv("/kaggle/input/tcs123/historical_prices_TCS.csv")
# stock_data.rename({'timeStamp':'ds','close':'y','symbol':'unique_id'},axis = 1, inplace = True)
# stock_data = stock_data[['ds', 'y', 'unique_id']]
# stock_data['ds'] = pd.to_datetime(stock_data['ds'])
# stock_data['ds'] = pd.to_datetime(stock_data['ds']).dt.tz_localize(None)
# stock_data

# Training

In [None]:
model = iTransformer(h=14,
                     input_size=140,
                     n_series=1,
                     hidden_size=1024,
                     n_heads=8,
                     e_layers=8,
                     d_layers=16,
                     d_ff=512,
                     factor=1,
                     dropout=0.1,
                     use_norm=True,
                     loss=MSE(),
                     valid_loss=MSE(),
                     early_stop_patience_steps=30,
                     batch_size=4,
                     max_steps = 3000)

fcst = NeuralForecast(models=[model], freq='B')
fcst.fit(df=train_data, val_size=14)

In [None]:
train_data

In [None]:
forecasts = fcst.predict()
forecasts

In [None]:
import plotly.graph_objects as go

fig = go.Figure()

# Plot true values
fig.add_trace(go.Scatter(x=stock_data['ds'], y=stock_data['y'], mode='lines', name='True', line=dict(color='blue')))

# Plot forecast values
fig.add_trace(go.Scatter(x=forecasts['ds'], y=forecasts['iTransformer'], mode='lines', name='Forecast', line=dict(color='red')))

# Update layout
fig.update_layout(
    title='INFY Forecast',
    xaxis_title='Year',
    yaxis_title='Close Price',
    font=dict(size=15),
    legend=dict(
        x=0,
        y=1,
        traceorder="normal",
        font=dict(
            family="sans-serif",
            size=15,
            color="black"
        ),
        bgcolor="LightSteelBlue",
        bordercolor="Black",
        borderwidth=2
    )
)

fig.show()

# Hyperparameter Tuning & Training Using Optuna

hidden_size': 240, 'learning_rate': 1.6696141497777103e-06, 'random_seed': 3, 'n_heads': 12, 'e_layers': 128, 'd_layers': 448, 'd_ff': 2048, 'factor': 5, 'dropout': 0.06, 'step_size': 90}

{'hidden_size': 144, 'learning_rate': 0.0002996329124007481, 'random_seed': 2, 'n_heads': 16, 'e_layers': 64, 'd_layers': 512, 'd_ff': 512, 'factor': 1, 'dropout': 0.01, 'step_size': 140}. Best is trial 6 with value: 5508.73095703125.

In [None]:
def config_itransformer(trial):
    return {
#         "h" : 14,
        "input_size" : trial.suggest_int("input_size", 60, 360, step = 60),
#         "input_size": 120,
        "n_series" : 1,
        "hidden_size" : trial.suggest_int("hidden_size", 48, 512, step = 48),
        "max_steps": 10,                                                                                               # Number of SGD steps                                                                                             # Size of input window
        "learning_rate": trial.suggest_loguniform("learning_rate", 1e-6, 1e-3),
        "val_check_steps": 50,
        "random_seed": trial.suggest_int("random_seed", 1, 10),
        "n_heads": trial.suggest_int("n_heads", 4, 24, step = 4),
        "e_layers": trial.suggest_int("e_layers", 64, 512, step = 64),
        "d_layers": trial.suggest_int("d_layers", 64, 1024, step = 64),
        "d_ff": trial.suggest_int("d_ff", 128, 2048, step = 128),
        "factor": trial.suggest_int("factor", 1, 3, step = 1),
        "dropout": trial.suggest_categorical("dropout", list(np.arange(0.04,0.2,0.02))),
        "step_size": trial.suggest_categorical("step_size", [90, 120, 140, 180]),
        "use_norm": True,
        "batch_size": 32,
        "early_stop_patience_steps": 2,
    }

In [None]:
model = AutoiTransformer(h=14,
                  n_series = 1,
                  loss=MSE(),
                  config=config_itransformer,
                  search_alg=optuna.samplers.TPESampler(),
                  backend='optuna',
                  num_samples=2)

In [None]:
fcst = NeuralForecast(models=[model], freq='B')
fcst.fit(df=train_data, val_size=14)

In [None]:
results = fcst.models[0].results.trials_dataframe()
results.drop(columns='user_attrs_ALL_PARAMS')
results

In [None]:
# import pandas as pd
# from datetime import datetime, timedelta

# def generate_business_timestamps(start_date, period):
#     end_date = start_date + timedelta(days=period)
#     business_days = pd.date_range(start=start_date, end=end_date, freq='B')
#     timestamps = []
#     for business_day in business_days:
#         timestamps.extend(pd.date_range(start=business_day.replace(hour=9, minute=15), end=business_day.replace(hour=15, minute=15), freq='5T'))

#     return timestamps

# # Example usage:
# start_date = stock_data['ds'].iloc[-1] + timedelta(days=1)
# period = 1
# dates = generate_business_timestamps(start_date, period)
# dates

In [None]:
forecasts = fcst.predict()
# forecasts = forecasts.reset_index()
# # forecasts['Date'] = dates
# forecasts = forecasts[0:146]
# forecasts['Date'] = dates
forecasts

In [None]:
# from datetime import date
# import yfinance as yf
# ticker = 'TCS'
# def fetch_stock_data(ticker, days=59):

#     end_date = date.today().strftime('%Y-%m-%d')
#     print(end_date)
#     # end_date = "2024"
#     start_date = (date.today() - timedelta(days=days)).strftime('%Y-%m-%d')
#     print(start_date)
#     stock_data = yf.download(ticker +'.NS', start=start_date, end=end_date, interval="5m")
#     stock_data.reset_index(inplace = True)
#     stock_data = stock_data[['Datetime','Close','High','Low']]
#     stock_data = stock_data.rename({'Datetime':'Date'},axis=1)
#     return stock_data

# yfin_data = fetch_stock_data(ticker, days=59)
# yfin_data

In [None]:
import plotly.graph_objects as go

fig = go.Figure()

# Plot true values
fig.add_trace(go.Scatter(x=stock_data['ds'], y=stock_data['y'], mode='lines', name='True', line=dict(color='blue')))

# fig.add_trace(go.Scatter(x=yfin_data['Date'], y=yfin_data['Close'], mode='lines', name='yfinance', line=dict(color='green')))

# Plot forecast values
fig.add_trace(go.Scatter(x=forecasts['ds'], y=forecasts['AutoiTransformer'], mode='lines', name='Forecast', line=dict(color='red')))

# Update layout
fig.update_layout(
    title='INFOSYS Forecast',
    xaxis_title='Year',
    yaxis_title='Close Price',
    font=dict(size=15),
    legend=dict(
        x=0,
        y=1,
        traceorder="normal",
        font=dict(
            family="sans-serif",
            size=15,
            color="black"
        ),
        bgcolor="LightSteelBlue",
        bordercolor="Black",
        borderwidth=2
    )
)

fig.show()