---

**Load essential libraries**

---

In [None]:
import pandas as pd
import numpy as np
np.set_printoptions(precision = 2, suppress = True)
import sys
from scipy import linalg

import matplotlib.pyplot as plt
import matplotlib.cm as cm
plt.style.use('dark_background')
%matplotlib inline

from sklearn.decomposition import PCA

from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split

from scipy.sparse import random
from scipy import stats

---

Mount Google Drive folder if running Google Colab

---

In [None]:
## Mount Google drive folder if running in Colab
if('google.colab' in sys.modules):
    from google.colab import drive
    drive.mount('/content/drive', force_remount = True)
    DIR = '/content/drive/MyDrive/Colab Notebooks/MAHE/MSIS Coursework/OddSem2024MAHE'
    DATA_DIR = DIR+'/Data/'
else:
    DATA_DIR = 'Data/'

---

Load ICU data

---

In [None]:
## Load data
FILE = DATA_DIR + 'ICU_Complete.csv'
dfICU = pd.read_csv(FILE)
dfICU.head()

---

Create a list of categorical and continuous features

---

In [None]:
## Create a list of categorical and continuous features
categorical_features = ['In-hospital_death', 'Gender', 'MechVent']
continuous_features = dfICU.columns[~dfICU.columns.isin(categorical_features)].to_list()
dfICU.dtypes

---

Convert categorical features to the categorical type

---

In [None]:
dfICU[categorical_features] = dfICU[categorical_features].astype('category')
dfICU.dtypes

---

$\color{yellow}{\textbf{Complete this}}$

Principal component analysis (PCA) of the dataset using the continuous features. For that, extract the dataframe comprising the continuous features.

---

In [None]:
## Extract dataframe comprising the continuous features
dfICU_continuous = ?
dfICU_continuous.head(10)

---

$\color{yellow}{\textbf{Complete this}}$

Make a scatter plot between heart rate and Temperature

---

In [None]:
## Scatter plot between heart rate and temperature
fig, ax = plt.subplots(1, 1, figsize = (4, 4))
fig.tight_layout(pad = 4.0)
ax.scatter(?, ?, s = 2.0, color = 'red')
ax.set_xlabel('Heart Rate (BPM)', fontsize = 12)
ax.set_ylabel('Temperature (Celsius)', fontsize = 12)
ax.set_title('Heart Rate vs. Temperature', fontsize = 14);

---

$\color{yellow}{\textbf{Complete this}}$

Calculate the variance of heart rates from scratch and using the in-built function in numpy


---

In [None]:
## Variance of heart rates
print(np.mean((dfICU_continuous[?]-np.mean(?))**2))
print(np.var(?))

---

$\color{yellow}{\textbf{Complete this}}$

Calculate the variance of temperatures from scratch and using the in-built function in numpy


---

In [None]:
## Variance of temperatures


---

$\color{yellow}{\textbf{Complete this}}$

Calculate the covariance between heart rate and temperature from scratch and using the in-built function in numpy


---

In [None]:
## Covariance between heart rate and blood pressur
print((1/dfICU_continuous.shape[0])*np.dot(?, ?))
print(np.cov(?, ?))

---

$\color{yellow}{\textbf{Complete this}}$

Calculate the correlation between heart rate and Temperature using the in-built function in numpy


---

In [None]:
## Correlation between heart rate and temperature using the in-built function in numpy
print(np.corrcoef(?, ?))

---

$\color{yellow}{\textbf{Complete this}}$

Create a matrix comprising the standardized values of the continuous features


---

In [None]:
## Standardized continuous features
sc = StandardScaler()
X = ?
print(X)

---

$\color{yellow}{\textbf{Complete this}}$

Perform PCA with one component and rank the features in terms of the magnitude of the PCA loadings

---

In [None]:
## Create and fit PCA object
pca = PCA(n_components = ?)
print(pca.fit_transform(?))

# Print principal components
print(pca.components_)

# Print explained variance
print(pca.explained_variance_)

---

$\color{yellow}{\textbf{Complete this}}$

Project the 0th sample onto the PC-1 direction

---

In [None]:
## Project the 0th sample onto the PC-1 direction
np.dot(?, ?)

---

$\color{yellow}{\textbf{Complete this}}$

Calculate the variance of the samples projected onto the PC-1 direction

---

In [None]:
## Calculate the variance of the samples projected onto the PC-1 direction
# np.var(pca.fit_transform(X))

---

$\color{yellow}{\textbf{Complete this}}$

Perform PCA with two components

---

In [None]:
# Create and fit PCA object
pca = PCA(?)
pca.fit_transform(?)

# Print principal components
print(pca.components_)

# Print explained variance
print(pca.explained_variance_)

---

$\color{yellow}{\textbf{Complete this}}$

Calculate the proportion of variance explained by projecting on to
PC-1 and onto both PC-1 and PC-2 directions

---

In [None]:
## Calculate the proportion of variance explained by projecting on to
## PC-1 and onto both PC-1 and PC-2 directions
?/?

---

$\color{yellow}{\textbf{Complete this}}$

Perform PCA with all components and select the number of PCs such that we are able to explain at least 75% of the total variance in the data

---

In [None]:
## Perform PCA with all components and select the number of PCs such that
## we are able to explain at least 75% of the total variance in the data

---

$\color{yellow}{\textbf{Complete this}}$


Fit a PCA model for the dataset considering only two features: (1) Heart Rate (2) Temperature

---

In [None]:
## Fit a PCA model for the dataset considering only two features:
## (1) Heart Rate (2) Temperature

In [None]:
## Scatter plot of heart rate and temperature along with the PC vectors
fig, ax = plt.subplots(1, 1, figsize = (4, 4))
fig.tight_layout(pad = 4.0)
ax.scatter(?, ?, s = 4.0, alpha = 0.2, color = 'red')
arrowprops = dict(arrowstyle = '->', linewidth = 2, shrinkA = 0, shrinkB = 0)
ax.annotate('', xy = (pca.components_[0, 0], pca.components_[1, 0]), xytext = (0, 0), arrowprops = arrowprops)
ax.annotate('', xy = (pca.components_[0, 1], pca.components_[1, 1]), xytext = (0, 0), arrowprops = arrowprops)