## Import CSV Files

In [4]:
# Import the pandas and pathlib libraries
import pandas as pd
from pathlib import Path

In [7]:
# Use the Pathlib libary to set the path to the CSV
vnq_path = Path("./Resources/vnq_10_years.csv")
agg_path = Path("./Resources/agg_10_years.csv")

In [12]:
# Use the file path to read the CSV into a DataFrame and display a few rows
vnq_df=pd.read_csv(vnq_path, index_col="Date", parse_dates=True, infer_datetime_format=True)
vnq_df.index=pd.DatetimeIndex(vnq_df.index)
agg_df=pd.read_csv(agg_path, index_col="Date", parse_dates=True, infer_datetime_format=True)
agg_df.index=pd.DatetimeIndex(agg_df.index)
display(vnq_df.head())
display(agg_df.head())

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2012-10-31,64.38
2012-11-01,64.69
2012-11-02,65.12
2012-11-05,64.69
2012-11-06,64.97


Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2012-10-31,112.19
2012-11-01,111.8
2012-11-02,111.87
2012-11-05,112.06
2012-11-06,111.74


In [13]:
# Use the `dtypes` attribute to list the column data types
display(vnq_df.dtypes)
display(agg_df.dtypes)

Close    float64
dtype: object

Close    float64
dtype: object

## Data Cleaning

In [14]:
# Use the `count` function to view count of non-null values for each column
vnq_df.count()

Close    2514
dtype: int64

In [15]:
agg_df.count()

Close    2514
dtype: int64

In [16]:
# Checking for null
vnq_df.isnull()

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2012-10-31,False
2012-11-01,False
2012-11-02,False
2012-11-05,False
2012-11-06,False
...,...
2022-10-19,False
2022-10-20,False
2022-10-21,False
2022-10-24,False


In [17]:
agg_df.isnull()

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2012-10-31,False
2012-11-01,False
2012-11-02,False
2012-11-05,False
2012-11-06,False
...,...
2022-10-19,False
2022-10-20,False
2022-10-21,False
2022-10-24,False


In [18]:
# Determining number of nulls
vnq_df.isnull().sum()

Close    0
dtype: int64

In [19]:
agg_df.isnull().sum()

Close    0
dtype: int64

We do not need to drop nulls, as we have no nulls in our data when we checked in the cells above.

In [20]:
# Use the `duplicated` function to determine the existance of duplicate rows: True or False
vnq_df.duplicated()

Date
2012-10-31    False
2012-11-01    False
2012-11-02    False
2012-11-05     True
2012-11-06    False
              ...  
2022-10-19     True
2022-10-20    False
2022-10-21     True
2022-10-24    False
2022-10-25     True
Length: 2514, dtype: bool

In [21]:
agg_df.duplicated()

Date
2012-10-31    False
2012-11-01    False
2012-11-02    False
2012-11-05    False
2012-11-06    False
              ...  
2022-10-19    False
2022-10-20    False
2022-10-21    False
2022-10-24    False
2022-10-25    False
Length: 2514, dtype: bool

Even though it shows that there are deplicated data in the file, We decided against dropping the duplicates, because the stocks can have the same closing price on different days. Also, we shouldn't have duplicated date, as indices should be unique.

In [31]:
# Save modified DataFrame to the Resources folder. Use the `index` parameter set to `False` to exclude saving the index.
vnq_df.to_csv("./Resources/cleaned_vnq_data.csv", index=True)
agg_df.to_csv("./Resources/cleaned_agg_data.csv", index=True)

## Concatenating Dataframes

In [24]:
# Join VNQ and AGG full datasets by axis
hedge_df=pd.concat([agg_df,vnq_df], axis="columns", join="inner")
hedge_df.head()

Unnamed: 0_level_0,Close,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-10-31,112.19,64.38
2012-11-01,111.8,64.69
2012-11-02,111.87,65.12
2012-11-05,112.06,64.69
2012-11-06,111.74,64.97


In [30]:
#Rename the columns to AGG and VNQ 
hedge_df.columns=["AGG", "VNQ"]
display(hedge_df.head())
display(hedge_df.tail())

Unnamed: 0_level_0,AGG,VNQ
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-10-31,112.19,64.38
2012-11-01,111.8,64.69
2012-11-02,111.87,65.12
2012-11-05,112.06,64.69
2012-11-06,111.74,64.97


Unnamed: 0_level_0,AGG,VNQ
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-10-19,94.2,77.88
2022-10-20,93.63,77.54
2022-10-21,93.77,78.15
2022-10-24,93.64,78.14
2022-10-25,94.56,81.21


In [28]:
hedge_df.to_csv("./Resources/cleaned_hedge_data.csv", index=True)

In [36]:
#Convert the daily closing price data into daily return data
hedge_daily_return_df = hedge_df.pct_change().dropna()
hedge_daily_return_df.head()

Unnamed: 0_level_0,AGG,VNQ
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-11-01,-0.003476,0.004815
2012-11-02,0.000626,0.006647
2012-11-05,0.001698,-0.006603
2012-11-06,-0.002856,0.004328
2012-11-07,0.002327,-0.004464


## Data Analysis

In [None]:
#Import one new library: Plotly.express, SciPy, Ggplot, Altair

In [None]:
#Price increase year-on-year: Ven

In [38]:
# Mean (Daily and Annualized): 
hedge_df.mean() * 252

AGG    27740.041718
VNQ    20957.228162
dtype: float64

In [None]:
# Standard deviation (volatility)(Daily and Annualized): Maxine

In [None]:
# 200 days Moving Average: Pauline

In [None]:
# Beta: Sam

In [None]:
# Sharpe ratio: Sam

In [None]:
# Monte Carlo price projection: Pauline

In [26]:
# Hvplot: 6-8 plots: MA, corrention, bar chart, scatter plot, view "info is beautiful"