In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
plt.style.use("fivethirtyeight")
%matplotlib inline
import yfinance as yf
from datetime import datetime
import statsmodels.api as sm
from neuralprophet import NeuralProphet

In [2]:
df = yf.download('TCS.NS', start='2019-01-01',end='2022-04-01',progress=False)

In [3]:
df.to_csv('tcs.csv')

In [4]:
df = pd.read_csv('tcs.csv')

In [5]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2019-01-01,1896.0,1910.0,1885.0,1902.800049,1785.039673,1094883
1,2019-01-02,1905.0,1934.449951,1900.0,1923.300049,1804.270996,2100463
2,2019-01-03,1919.0,1944.949951,1893.099976,1899.949951,1782.365967,2611668
3,2019-01-04,1900.0,1901.199951,1841.0,1876.849976,1760.695557,4280862
4,2019-01-07,1891.800049,1908.800049,1881.0,1897.900024,1780.442993,1856423


In [6]:
df.isna().sum()

Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 803 entries, 0 to 802
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       803 non-null    object 
 1   Open       803 non-null    float64
 2   High       803 non-null    float64
 3   Low        803 non-null    float64
 4   Close      803 non-null    float64
 5   Adj Close  803 non-null    float64
 6   Volume     803 non-null    int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 44.0+ KB


In [8]:
df.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,803.0,803.0,803.0,803.0,803.0,803.0
mean,2664.470236,2692.677268,2634.335249,2662.751246,2610.66889,3239971.0
std,663.861285,666.455121,660.537315,663.460296,694.125584,1835397.0
min,1559.699951,1685.449951,1506.050049,1636.349976,1600.641357,224405.0
25%,2103.5,2127.725098,2076.775024,2105.75,2013.265503,2072809.0
50%,2275.0,2303.850098,2250.5,2276.050049,2227.217773,2769033.0
75%,3229.5,3259.875,3192.5,3219.174927,3199.953491,3751640.0
max,4033.949951,4043.0,3980.0,4019.149902,4012.099854,19839330.0


In [None]:
plt.figure(figsize=(10, 6))
df['Close'].plot()
plt.ylabel('Close')
plt.xlabel(None)
plt.title(f"Closing Price of TCS")
plt.tight_layout()

In [None]:
plt.figure(figsize=(10, 6))
df['Volume'].plot()
plt.ylabel('Volume')
plt.xlabel(None)
plt.title(f"Volume of TCS")
plt.tight_layout()

# Plotting a Correlation Heatmap

In [None]:
fig, ax = plt.subplots(figsize=(10,10))  
dataplot = sns.heatmap(df.corr(), cmap="YlGnBu", annot=True)

In [None]:
cols_plot = ['Open', 'Close', 'High','Low']
axes = df[cols_plot].plot(figsize=(11, 9), subplots=True)
for ax in axes:
    ax.set_ylabel('Daily trade')

# Seasonal Decompose

### if the components of the time series are added together to make the time series. Then the time series is called the additive time series. By visualization, we can say the time series is additive if the increasing or decreasing pattern of the time series is similar throughout the series.

### if the components of the time series are multiplicative together, then the time series is called the multiplicative time series. By visualization, if the time series is having exponential growth or decrement with time then the time series can be considered as the multiplicative time series.

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose


plt.rcParams.update({"figure.figsize":(10,10)})
y = df['Close'].to_frame()

# Multiplicative Decomoposition
result_mul = seasonal_decompose(y, model='multiplicative',period=52)

# Additive Decomposition
result_add = seasonal_decompose(y, model='additive',period = 52)
# Plot
plt.rcParams.update({'figure.figsize': (10,10)})
result_mul.plot().suptitle('Multiplicative Decompose', fontsize=22)
result_add.plot().suptitle('Additive Decompose', fontsize=22)
plt.show()

# Box Plot
It is a standardized way of displaying the distribution of data. Any data points which are **less than Q1-1.5IQR** or **greater than Q3+1.5IQR** are considered to be outliers.

In [None]:
plt.rcParams['figure.figsize']=(17,5)
sns.boxplot(df['Close'], color='brown')
plt.show()

In [None]:
df['Date'] = pd.to_datetime(df.Date,format='%Y-%m-%d')
df.index = df['Date']

In [None]:
df.head(5)

### ACF

In [None]:
sm.graphics.tsa.plot_acf(df['Close'].iloc[1:], lags=40, title='Auto Correlation of Close')
plt.show()

### PCF

In [None]:
sm.graphics.tsa.plot_pacf(df['Close'].iloc[1:], lags=40, title='Partial Auto Correlation of Close')
plt.show()

In [None]:
data = pd.DataFrame(index=range(0,len(df)),columns=['Open','Close'])

for i in range(0,len(data)):
   # data['Date'][i] = df['Date'][i]
    data['Open'][i] = df['Open'][i]
    data['Close'][i] = df['Close'][i]

In [None]:
data.columns = ['Open','Close']
#data['Date'] = pd.to_datetime(data['Date'],format="%Y-%m")

In [None]:
#ata = data.set_index('Date')

In [None]:
data.head()

In [None]:
length_train = int(np.ceil( len(df) * .95 ))
train = data.iloc[:length_train,:]
test=data.iloc[length_train:,: ]

In [None]:
len(train),len(test)

In [None]:
x_train = train.drop('Close', axis=1)
y_train = train['Close']
x_test = test.drop('Close', axis=1)
y_test = test['Close']

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(x_train,y_train)

In [None]:
preds = model.predict(x_test)
rms=np.sqrt(np.mean(np.power((np.array(y_test)-np.array(preds)),2)))
rms

In [None]:
from sklearn.tree import DecisionTreeRegressor
from sklearn import metrics
from sklearn.model_selection import cross_val_score

In [None]:
model1 = DecisionTreeRegressor()
model1.fit(x_train,y_train)

In [None]:
preds1 = model1.predict(x_test)

In [None]:
rms=np.sqrt(np.mean(np.power((np.array(y_test)-np.array(preds1)),2)))
rms

In [None]:
from sklearn.ensemble import RandomForestRegressor

In [None]:
model2 = RandomForestRegressor()
model2.fit(x_train,y_train)

In [None]:
preds2 = model2.predict(x_test)

In [None]:
rms=np.sqrt(np.mean(np.power((np.array(y_test)-np.array(preds2)),2)))
rms

### LSTM

In [None]:
from keras.models import Sequential
from keras.layers import Dense, LSTM ,Dropout

In [None]:
x_train1 = np.asarray(x_train).astype(np.float32)
y_train1   = np.asarray(y_train).astype(np.float32)


x_train1 = np.expand_dims(x_train1, -1)
y_train1   = np.expand_dims(y_train1, -1)

In [None]:
regression= Sequential()
regression.add(LSTM(units=100,return_sequences=True,input_shape=(x_train.shape[1],1)))
regression.add(Dropout(0.2))
regression.add(LSTM(units=100,return_sequences=True))
regression.add(Dropout(0.2))
regression.add(LSTM(units=100,return_sequences=True))
regression.add(Dropout(0.2))
regression.add(LSTM(units=100))
regression.add(Dropout(0.2))
regression.add(Dense(units=1))
regression.compile(optimizer='adam',loss='mean_squared_error')
regression.fit(x_train1,y_train1,batch_size=32,epochs=300)

In [None]:
x_test1 = np.array(x_test).astype(np.float32)
y_test1 = np.array(y_test).astype(np.float32)
# Reshape the data
x_test1 = np.expand_dims(x_test1, -1)
y_test1 = np.expand_dims(y_test1, -1)

# Get the models predicted price values 
predictions = regression.predict(x_test1)
#predictions = scaler.inverse_transform(predictions)

# Get the root mean squared error (RMSE)
rmse = np.sqrt(np.mean(((predictions - y_test1) ** 2)))
rmse

In [None]:
test['Predictions'] = predictions
# Visualize the data
plt.figure(figsize=(16,6))
plt.title('Model')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Open Price USD ($)', fontsize=18)
plt.plot(train['Close'])
plt.plot(test[['Close', 'Predictions']])
plt.legend(['Train', 'Val', 'Predictions'], loc='lower right')
plt.show()

### NeuralProphet

In [None]:
df1 = df[["Date", "Close"]]
modelp = NeuralProphet()
df1.rename(columns={"Date": "ds", "Close": "y"}, inplace=True)
metrics = modelp.fit(df1, freq="D")

In [None]:
future = modelp.make_future_dataframe(df1, periods=365, n_historic_predictions=len(df1))
forecast = modelp.predict(future)

In [None]:
fig1 = modelp.plot(forecast)