In [None]:
# Import libraries and dependencies
import pandas as pd
from pathlib import Path
import seaborn as sns
%matplotlib inline

In [None]:
# Set file paths
hd_data = Path("../Pandas/HD.csv")
jnj_data = Path("../Pandas/JNJ.csv")
intc_data = Path("../Pandas/INTC.csv")
amd_data = Path("../Pandas/AMD.csv")
mu_data = Path("../Pandas/MU.csv")
nvda_data = Path("../Pandas/NVDA.csv")
tsm_data = Path("../Pandas/TSM.csv")

# Read the individual CSV datasets
hd = pd.read_csv(hd_data, index_col="date")
jnj = pd.read_csv(jnj_data, index_col="date")
intc = pd.read_csv(intc_data, index_col="date")
amd = pd.read_csv(amd_data, index_col="date")
mu = pd.read_csv(mu_data, index_col="date")
nvda = pd.read_csv(nvda_data, index_col="date")
tsm = pd.read_csv(tsm_data, index_col="date")

intc.head()

##  Combine the DataFrames

In [None]:
# Use the `concat` function to combine the DataFrames by matching indexes (or in this case `date`)
combined_df = pd.concat([hd, jnj, intc, mu, amd ,nvda, tsm], axis="columns", join="inner")
# Sort datetime index in ascending order 
combined_df.sort_index(inplace=True)
# Set column names
combined_df.columns = ['hd', 'jnj', 'intc', 'mu', 'amd', 'nvda', 'tsm']
# display few rows
combined_df.head()

##  Calculate Daily Returns

In [None]:
# Use the `pct_change` function to calculate daily returns for each stock
daily_returns = combined_df.pct_change()
daily_returns.head()

## Calculate Correlation

In [None]:
# Use the `corr` function to calculate correlations for each stock pair
correlation = daily_returns.corr()
correlation

##  Plot Correlation

In [None]:
# Create a heatmap from the correlation values
sns.heatmap(correlation)

In [None]:
# Create a heatmap from the correlation values and adjust the scale
sns.heatmap(correlation, vmin = -1, vmax = 1)

##  Which semiconductor stock would be the best candidate to add to the existing portfolio?

##  Evaluate Riskiness of Stocks

In [None]:
# Use the `std` function and multiply by the square root of the number of trading days in a year to get annualized volatility
volatility = daily_returns.std() * np.sqrt(252)
volatility.sort_values(inplace=True)
print(volatility)

In [None]:
# Drop the 5 stocks with the highest volatility in daily returns
daily_returns.drop(columns=['hd', 'jnj', 'intc', 'mu', 'amd', 'nvda', 'tsm'], inplace=True)
daily_returns.head()