In [1]:
!pip install yfinance




[notice] A new release of pip is available: 23.1.2 -> 23.3.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import pandas as pd
import yfinance as yf
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

# Question 1:
 
Generate 1500 data points for the variables x, y, z with the Python (or using another language of your choice) commands.

Do a Principal Components Analysis (PCA) on the sample of three-dimensional observations, and write down the three components and the corresponding principal values. (If they are not vectors of length 3, you are doing something wrong.) Check that the pointwise products of the components are zero (up to machine precision). Write down the dot (i.e. pointwise) product of the first and third components, and compare that with the dot product of xdata and zdata. 


In [6]:
# Generate 1500 data points for x, y, and z
mean = np.array([2, 3, 0])
matrix_m = np.array([[10, 7, 5], [7, 6, 4], [5, 4, 3]])
sample = np.random.multivariate_normal(mean, matrix_m, 1500).T

# Separate the data into xdata, ydata, and zdata
x_data = sample[0, :]
y_data = sample[1, :]
z_data = sample[2, :]

#combine the data
xyz_data = np.vstack((x_data, y_data, z_data)).T

### Mean & Centred Matrix

In [8]:
#calculate the mean
xyz_mean = np.mean(xyz_data, axis=0)

#Subtract the mean from the respective data points
xyz_centred = xyz_data - xyz_mean
xyz_centred

array([[-0.10032962, -1.31959638,  0.02155964],
       [ 0.3504718 , -0.00232677,  0.36438307],
       [-0.83946646, -1.15294744, -0.98627194],
       ...,
       [ 0.99335398,  1.08596985, -0.01064963],
       [ 1.8544664 , -0.11734681,  0.24956654],
       [ 1.7070037 , -0.48077755, -0.01253713]])

### Covariance Matrix - First Principles

In [9]:
#factoring degrees of freedom
n = sample.shape[1]
cov_matrix = np.dot(xyz_centred.T, xyz_centred)/ (n-1)
cov_matrix

array([[9.72231028, 6.85500445, 4.95713227],
       [6.85500445, 5.91652483, 3.96750888],
       [4.95713227, 3.96750888, 3.00291085]])

In [10]:
#Checking with Black Box
cov = np.cov(xyz_centred, rowvar=False)
cov

array([[9.72231028, 6.85500445, 4.95713227],
       [6.85500445, 5.91652483, 3.96750888],
       [4.95713227, 3.96750888, 3.00291085]])

### Eigen Values and Vectors & Principal Components

In [11]:
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

#sort values eigen vector according to eigen value
indices = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[indices]
eigenvectors = eigenvectors[:, indices]

In [12]:
# Principal Values
princ_val_1 = eigenvalues[0]
princ_val_2 = eigenvalues[1]
princ_val_3 = eigenvalues[2]

# Principal Components
comp_1 = eigenvectors[:, 0]
comp_2 = eigenvectors[:, 1]
comp_3 = eigenvectors[:, 2]

In [13]:
# Check that the pointwise products of the components are zero (up to machine precision)
component_products = np.dot(eigenvectors.T, eigenvectors)
print("Pointwise products of components:\n", component_products)

Pointwise products of components:
 [[ 1.00000000e+00 -7.69461636e-17  2.61766241e-17]
 [-7.69461636e-17  1.00000000e+00  7.29908294e-16]
 [ 2.61766241e-17  7.29908294e-16  1.00000000e+00]]


### Pointwise Products of Components

In [14]:
# Check pointwise products of components
product_12 = np.dot(comp_1, comp_2)
product_13 = np.dot(comp_1, comp_3)
product_23 = np.dot(comp_2, comp_3)

print('Product of component 1 and 2:', product_12)
print('Product of component 1 and 3:', product_13)
print('Product of component 2 and 3:', product_23)

Product of component 1 and 2: -7.694616362318486e-17
Product of component 1 and 3: 2.617662409443183e-17
Product of component 2 and 3: 7.299082944306094e-16


In [15]:
# Check if pointwise products are close to zero
if np.isclose(product_12, 0) and np.isclose(product_13, 0) and np.isclose(product_23, 0):
    print("Pointwise products of the components are approximately 0.")
else:
    print("Pointwise products of the components are not 0.")

Pointwise products of the components are approximately 0.
