In [None]:
!pip install yfinance
!pip install pyfolio
!pip install pmdarima

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting yfinance
  Downloading yfinance-0.1.72-py2.py3-none-any.whl (27 kB)
Collecting requests>=2.26
  Downloading requests-2.28.1-py3-none-any.whl (62 kB)
[K     |████████████████████████████████| 62 kB 1.2 MB/s 
[?25hCollecting lxml>=4.5.1
  Downloading lxml-4.9.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (6.4 MB)
[K     |████████████████████████████████| 6.4 MB 30.6 MB/s 
Installing collected packages: requests, lxml, yfinance
  Attempting uninstall: requests
    Found existing installation: requests 2.23.0
    Uninstalling requests-2.23.0:
      Successfully uninstalled requests-2.23.0
  Attempting uninstall: lxml
    Found existing installation: lxml 4.2.6
    Uninstalling lxml-4.2.6:
      Successfully uninstalled lxml-4.2.6
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. Thi

# **IMPORTING THE REQUIRED LIBRARIES**

In [None]:
import os
import warnings
warnings.filterwarnings('ignore')
import math
import yfinance as yf
yf.pdr_override()
import numpy as np
import pandas as pd
import pyfolio as pf
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from pylab import rcParams
from sklearn.metrics import mean_squared_error, mean_absolute_error
from statsmodels.tsa.seasonal import seasonal_decompose
from tensorflow.keras import layers
from statsmodels.tsa.arima.model import ARIMA
from pmdarima.arima import auto_arima



# **Acquisition of Stock Data**

In [None]:
stock_data = yf.download('TATAMOTORS.NS', start='2000-01-01', end='2021-10-01')
stock_data.head()

In [None]:
stock_data.info()

In [None]:
stock_data.describe()

# **Visualizing Stock Prices History**

In [None]:
plt.figure(figsize=(15, 8))
plt.grid(which="major", color='g', linestyle='-.', linewidth=0.5)
plt.title('Stock Prices History',fontsize=16)
plt.plot(stock_data['Close'])
plt.xlabel('Date', fontsize=14)
plt.ylabel('Prices', fontsize=14)

In [None]:
df_close = stock_data['Close']
df_close.plot(kind='kde')

In [None]:
stock_data['Adj Close'].plot(figsize=(15,8))
plt.title("Adjusted Close Price of TATA MOTORS" , fontsize=16)
plt.ylabel('Price', fontsize=14)
plt.xlabel('Year', fontsize=14)
plt.grid(which="major", color='g', linestyle='-.', linewidth=0.5)
plt.show()

In [None]:
rcParams['figure.figsize'] = 10, 6
df_log = np.log(df_close)
moving_avg = df_log.rolling(12).mean()
std_dev = df_log.rolling(12).std()
plt.legend(loc='best')
plt.title('Moving Average')
plt.plot(std_dev, color ="green", label = "Standard Deviation")
plt.plot(moving_avg, color="black", label = "Mean")
plt.legend()
plt.show()

# **Correlation of attributes in stock**

In [None]:
plt.figure(figsize=(10,10))
sns.heatmap(stock_data.corr(), cmap="YlGnBu", annot=True);

# **Analysising the pyfolio of the stock**

In [None]:
tickers_list = ['TATAMOTORS.NS']
data = pd.DataFrame(columns=tickers_list)
for ticker in tickers_list:
     data[ticker] = yf.download(ticker, period='10y',)['Adj Close']

data = data.pct_change().dropna().mean(axis=1)
pf.create_simple_tear_sheet(data)

# **Data Preprocessing:**


# **Preparation of training set**

In [None]:
close_prices = stock_data['Close']
values = close_prices.values
training_data_len = math.ceil(len(values)* 0.8)

scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(values.reshape(-1,1))
train_data = scaled_data[0: training_data_len, :]

x_train = []
y_train = []

for i in range(60, len(train_data)):
    x_train.append(train_data[i-60:i, 0])
    y_train.append(train_data[i, 0])

x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

# **Preparation of test set**

In [None]:
test_data = scaled_data[training_data_len-60: , : ]
x_test = []
y_test = values[training_data_len:]

for i in range(60, len(test_data)):
  x_test.append(test_data[i-60:i, 0])

x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

# **Length of the data**

In [None]:
print(len(stock_data))
print(len(train_data))
print(len(test_data))


# **Setting Up LSTM Network Architecture**

In [None]:
model = keras.Sequential()
model.add(layers.LSTM(100, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(layers.LSTM(100, return_sequences=False))
model.add(layers.Dense(25))
model.add(layers.Dense(1))
model.summary()

# **Training LSTM Model**

In [None]:
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train, y_train, batch_size= 1, epochs=3)

# **LSTM Model Evaluation**

In [None]:
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)
rmse = np.sqrt(np.mean(predictions - y_test))
rmse

# **Visualizing the Predicted Prices(LSTM)**

In [None]:
data = stock_data.filter(['Close'])
train = data[:training_data_len]
validation = data[training_data_len:]
validation['Predictions'] = predictions
plt.figure(figsize=(16,8))
plt.title('LSTM Model')
plt.xlabel('Date')
plt.ylabel('Close Price USD ')
plt.plot(train)
plt.plot(validation[['Close', 'Predictions']])
plt.legend(['Train', 'Val', 'Predictions'], loc='lower right')
plt.show()

# **ARIMA MODEL**

In [None]:
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.seasonal import seasonal_decompose
import statsmodels.api as sm


In [None]:
def test_stationarity(timeseries):
    #Determing rolling statistics
    rolmean = timeseries.rolling(12).mean()
    rolstd = timeseries.rolling(12).std()
    #Plot rolling statistics:
    plt.plot(timeseries, color='blue',label='Original')
    plt.plot(rolmean, color='red', label='Rolling Mean')
    plt.plot(rolstd, color='black', label = 'Rolling Std')
    plt.legend(loc='best')
    plt.title('Rolling Mean and Standard Deviation')
    plt.show(block=False)
    print("Results of dickey fuller test")
    adft = adfuller(timeseries,autolag='AIC')
    # output for dft will give us without defining what the values are.
    #hence we manually write what values does it explains using a for loop
    output = pd.Series(adft[0:4],index=['Test Statistics','p-value','No. of lags used','Number of observations used'])
    for key,values in adft[4].items():
        output['critical value (%s)'%key] =  values
    print(output)
test_stationarity(df_close)

In [None]:
result = seasonal_decompose(df_close, model='multiplicative', period=12)
fig = plt.figure()
fig = result.plot()
fig.set_size_inches(10, 4)

In [None]:
from pylab import rcParams
rcParams['figure.figsize'] = 10, 6
df_log = np.log(df_close)
moving_avg = df_log.rolling(12).mean()
std_dev = df_log.rolling(12).std()
plt.legend(loc='best')
plt.title('Moving Average')
plt.plot(std_dev, color ="black", label = "Standard Deviation")
plt.plot(moving_avg, color="red", label = "Mean")
plt.legend()
plt.show()

In [None]:
train_data, test_data = df_log[3:int(len(df_log)*0.9)], df_log[int(len(df_log)*0.9):]
plt.figure(figsize=(10,6))
plt.grid(True)
plt.xlabel('Dates')
plt.ylabel('Closing Prices')
plt.plot(df_log, 'green', label='Train data')
plt.plot(test_data, 'blue', label='Test data')
plt.legend( )

In [None]:
modell = auto_arima(train_data, trace=True, error_action='ignore', suppress_warnings=True)
modell.fit(train_data)
forecast = modell.predict(n_periods=len(test_data))
forecast = pd.DataFrame(forecast, index=test_data.index, columns=['Prediction'])

plt.plot(train_data, label = 'Train')
plt.plot(test_data, label='Test')
plt.plot(forecast, label='Prediction')
plt.title('TATA Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('Actual Price')
plt.legend(loc='upper left', fontsize=8)
plt.show()

In [None]:
mse = mean_squared_error(test_data, forecast)
print('MSE: '+str(mse))
mae = mean_absolute_error(test_data, forecast)
print('MAE: '+str(mae))
rmse = math.sqrt(mean_squared_error(test_data, forecast))
print('RMSE: '+str(rmse))