## Predicting Data Using Time-Series Correlation

### Import Libraries and Dependencies

In [1]:
# Import necessary libraries and dependencies
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
from pathlib import Path
import hvplot.pandas
%matplotlib inline

### Read in Files

In [2]:
# Import data
tsla_path = Path('../Resources/tsla_stock_volume.csv')

# Read in data and index by date
tsla_data = pd.read_csv(
    tsla_path, 
    index_col='Date',     
    parse_dates=True, 
    infer_datetime_format=True
)
tsla_data

Unnamed: 0_level_0,close,volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-06-29 10:00:00,3.62,412770
2010-06-29 11:00:00,3.67,98220
2010-06-29 12:00:00,3.82,84465
2010-06-29 13:00:00,4.40,159355
2010-06-29 14:00:00,4.79,161970
...,...,...
2021-02-08 10:00:00,864.26,10684
2021-02-08 11:00:00,865.49,9550
2021-02-08 12:00:00,858.38,10333
2021-02-08 13:00:00,858.72,12833


In [3]:
# Use hvPlot to visualize the closing price of Tesla over time.
tsla_data['close'].hvplot()

In [4]:
# For the combined dataframe, slice to just 2020
tesla_2020 = tsla_data['2020-01':'2020-12']

# View the first and last five rows of first_half_2020 DataFrame
display(tesla_2020.head())
display(tesla_2020.tail())

Unnamed: 0_level_0,close,volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-01-02 08:00:00,84.9,70419
2020-01-02 09:00:00,84.79,55498
2020-01-02 10:00:00,85.29,36625
2020-01-02 11:00:00,85.58,35202
2020-01-02 12:00:00,85.88,50231


Unnamed: 0_level_0,close,volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-12-31 10:00:00,714.22,67741
2020-12-31 11:00:00,714.84,34614
2020-12-31 12:00:00,716.63,23669
2020-12-31 13:00:00,702.35,43745
2020-12-31 14:00:00,705.21,55049


In [5]:
# Use hvPlot to visualize the close and Tesla stock data
# Plot each column on a separate axes using the following syntax
tesla_2020.hvplot(shared_axes=False, subplots=True).cols(1)

In [6]:
# Create a new volume column 
# This column should shift the volume back by one hour
tsla_data['Lagged Volume'] = tsla_data['volume'].shift(1)

In [7]:
# Create a new variable called Stock Volatility
# This column should calculate the standard deviation of the closing stock price return data over a 4 period rolling window
tsla_data['Stock Volatility'] = tsla_data['close'].pct_change().rolling(window=4).std()

In [8]:
# Use hvPlot to visualize the stock volatility
tsla_data['Stock Volatility'].hvplot()

In [9]:
# Create a new column called Hourly Stock Return
# This column should calculate hourly return percentage of the closing price
tsla_data['Hourly Stock Return'] = tsla_data['close'].pct_change()

In [10]:
# Construct correlation table of Stock Volatility, Lagged Volume, and Hourly Stock Return
tsla_data[['Stock Volatility', 'Lagged Volume', 'Hourly Stock Return']].corr()

Unnamed: 0,Stock Volatility,Lagged Volume,Hourly Stock Return
Stock Volatility,1.0,0.465363,0.061435
Lagged Volume,0.465363,1.0,0.024978
Hourly Stock Return,0.061435,0.024978,1.0
