In [1]:
!pip install yfinance




[notice] A new release of pip is available: 23.1.2 -> 23.3.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import pandas as pd
import yfinance as yf
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

# Question 1:
 
Generate 1500 data points for the variables x, y, z with the Python (or using another language of your choice) commands.

Do a Principal Components Analysis (PCA) on the sample of three-dimensional observations, and write down the three components and the corresponding principal values. (If they are not vectors of length 3, you are doing something wrong.) Check that the pointwise products of the components are zero (up to machine precision). Write down the dot (i.e. pointwise) product of the first and third components, and compare that with the dot product of xdata and zdata. 


In [6]:
# Generate 1500 data points for x, y, and z
mean = np.array([2, 3, 0])
matrix_m = np.array([[10, 7, 5], [7, 6, 4], [5, 4, 3]])
sample = np.random.multivariate_normal(mean, matrix_m, 1500).T

# Separate the data into xdata, ydata, and zdata
x_data = sample[0, :]
y_data = sample[1, :]
z_data = sample[2, :]

#combine the data
xyz_data = np.vstack((x_data, y_data, z_data)).T

### Mean & Centred Matrix

In [8]:
#calculate the mean
xyz_mean = np.mean(xyz_data, axis=0)

#Subtract the mean from the respective data points
xyz_centred = xyz_data - xyz_mean
xyz_centred

array([[-0.10032962, -1.31959638,  0.02155964],
       [ 0.3504718 , -0.00232677,  0.36438307],
       [-0.83946646, -1.15294744, -0.98627194],
       ...,
       [ 0.99335398,  1.08596985, -0.01064963],
       [ 1.8544664 , -0.11734681,  0.24956654],
       [ 1.7070037 , -0.48077755, -0.01253713]])

### Covariance Matrix - First Principles

In [9]:
#factoring degrees of freedom
n = sample.shape[1]
cov_matrix = np.dot(xyz_centred.T, xyz_centred)/ (n-1)
cov_matrix

array([[9.72231028, 6.85500445, 4.95713227],
       [6.85500445, 5.91652483, 3.96750888],
       [4.95713227, 3.96750888, 3.00291085]])

In [10]:
#Checking with Black Box
cov = np.cov(xyz_centred, rowvar=False)
cov

array([[9.72231028, 6.85500445, 4.95713227],
       [6.85500445, 5.91652483, 3.96750888],
       [4.95713227, 3.96750888, 3.00291085]])

### Eigen Values and Vectors & Principal Components

In [11]:
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

#sort values eigen vector according to eigen value
indices = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[indices]
eigenvectors = eigenvectors[:, indices]

In [12]:
# Principal Values
princ_val_1 = eigenvalues[0]
princ_val_2 = eigenvalues[1]
princ_val_3 = eigenvalues[2]

# Principal Components
comp_1 = eigenvectors[:, 0]
comp_2 = eigenvectors[:, 1]
comp_3 = eigenvectors[:, 2]

In [13]:
# Check that the pointwise products of the components are zero (up to machine precision)
component_products = np.dot(eigenvectors.T, eigenvectors)
print("Pointwise products of components:\n", component_products)

Pointwise products of components:
 [[ 1.00000000e+00 -7.69461636e-17  2.61766241e-17]
 [-7.69461636e-17  1.00000000e+00  7.29908294e-16]
 [ 2.61766241e-17  7.29908294e-16  1.00000000e+00]]


### Pointwise Products of Components

In [14]:
# Check pointwise products of components
product_12 = np.dot(comp_1, comp_2)
product_13 = np.dot(comp_1, comp_3)
product_23 = np.dot(comp_2, comp_3)

print('Product of component 1 and 2:', product_12)
print('Product of component 1 and 3:', product_13)
print('Product of component 2 and 3:', product_23)

Product of component 1 and 2: -7.694616362318486e-17
Product of component 1 and 3: 2.617662409443183e-17
Product of component 2 and 3: 7.299082944306094e-16


In [15]:
# Check if pointwise products are close to zero
if np.isclose(product_12, 0) and np.isclose(product_13, 0) and np.isclose(product_23, 0):
    print("Pointwise products of the components are approximately 0.")
else:
    print("Pointwise products of the components are not 0.")

Pointwise products of the components are approximately 0.


# Factor Modeling

Download 5 years historical daily price data for the Johannesburg Stock Exchange index, for four large companies of your choice on the JSE, as well as another financial variable of your choice. From the prices determine the daily returns.

In [16]:
securities = ['SOL.JO', 'VOD.JO', 'DRD.JO', 'SBK.JO',"ZAR=X"]

end_date = pd.to_datetime("2023-08-31")
start_date = pd.to_datetime("2018-09-01")

data = yf.download(securities, start= start_date, end=end_date)['Adj Close']

#convert exchange rate to cents
data['ZAR=X'] = data['ZAR=X'] *100
data.head()

[*********************100%%**********************]  5 of 5 completed


Ticker,DRD.JO,SBK.JO,SOL.JO,VOD.JO,ZAR=X
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-09-03,232.917618,13547.772461,49484.558594,9269.163086,1471.525002
2018-09-04,243.04451,13033.686523,50271.625,8848.467773,1485.000038
2018-09-05,241.486511,12634.325195,50577.246094,8820.713867,1531.869984
2018-09-06,256.287231,12949.458008,50224.066406,8961.674805,1540.110016
2018-09-07,257.066284,13142.602539,49518.582031,8950.720703,1531.159973


### Daily returns

In [17]:
#calculate daily returns
daily_returns = np.log(data).diff()
daily_returns.head()

Ticker,DRD.JO,SBK.JO,SOL.JO,VOD.JO,ZAR=X
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-09-03,,,,,
2018-09-04,0.04256,-0.038685,0.01578,-0.046449,0.009116
2018-09-05,-0.006431,-0.03112,0.006061,-0.003142,0.031074
2018-09-06,0.059485,0.024637,-0.007007,0.015854,0.005365
2018-09-07,0.003035,0.014805,-0.014146,-0.001223,-0.005828


### Covariance Matrix  - First Principles

In [18]:
#First Priinciples
daily_returns_1 = daily_returns.dropna()
daily_returns_mean = np.mean(daily_returns_1, axis=0)
#centred matrix
daily_returns_centred = daily_returns_1 - daily_returns_mean

#factoring degrees of freedom
n = daily_returns_1.shape[0]
cov_matrix_2 = np.dot(daily_returns_centred.T, daily_returns_centred)/(n-1)
cov_matrix_2

array([[ 1.14327697e-03, -5.94169786e-05, -1.42884013e-05,
         4.42184678e-05,  2.22671685e-05],
       [-5.94169786e-05,  4.77277257e-04,  3.84948684e-04,
         1.08940923e-04, -2.37416567e-05],
       [-1.42884013e-05,  3.84948684e-04,  2.05454710e-03,
         1.48311613e-04, -3.70621560e-05],
       [ 4.42184678e-05,  1.08940923e-04,  1.48311613e-04,
         3.58267398e-02, -9.30126859e-05],
       [ 2.22671685e-05, -2.37416567e-05, -3.70621560e-05,
        -9.30126859e-05,  9.45436731e-05]])

In [19]:
#Checking with Black Box
cov = np.cov(daily_returns_1, rowvar=False)
cov

array([[ 1.14327697e-03, -5.94169786e-05, -1.42884013e-05,
         4.42184678e-05,  2.22671685e-05],
       [-5.94169786e-05,  4.77277257e-04,  3.84948684e-04,
         1.08940923e-04, -2.37416567e-05],
       [-1.42884013e-05,  3.84948684e-04,  2.05454710e-03,
         1.48311613e-04, -3.70621560e-05],
       [ 4.42184678e-05,  1.08940923e-04,  1.48311613e-04,
         3.58267398e-02, -9.30126859e-05],
       [ 2.22671685e-05, -2.37416567e-05, -3.70621560e-05,
        -9.30126859e-05,  9.45436731e-05]])

### Eigen Values, Vectors and Principal Components

In [20]:
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix_2)
#sort values in
indices = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[indices]
eigenvectors = eigenvectors[:, indices]
eigenvalues

array([3.58280363e-02, 2.14423704e-03, 1.14688759e-03, 3.84815822e-04,
       9.24080411e-05])

In [21]:
eigenvectors

array([[ 0.00126598, -0.0279733 , -0.99674987,  0.07327003, -0.01836009],
       [ 0.00312951,  0.2257781 ,  0.06412542,  0.97081608,  0.04917855],
       [ 0.00442928,  0.97355253, -0.0439448 , -0.22397672,  0.00889579],
       [ 0.99998109, -0.00503633,  0.00120082, -0.00225492,  0.00243507],
       [-0.00260879, -0.02029413, -0.021096  , -0.04446334,  0.99857865]])

### Principal Components

In [22]:
# Principal Values
princ_val_1 = eigenvalues[0]
princ_val_2 = eigenvalues[1]
princ_val_3 = eigenvalues[2]
princ_val_4 = eigenvalues[3]
princ_val_5 = eigenvalues[4]
# Principal Components (Factors)
comp_1 = eigenvectors[:, 0]
comp_2 = eigenvectors[:, 1]
comp_3 = eigenvectors[:, 2]
comp_4 = eigenvectors[:, 3]
comp_5 = eigenvectors[:, 4]

print("Factors" , "\n F1:" ,comp_1, "\n F2:" ,comp_2, "\n F3:" ,comp_3, "\n F4:" ,comp_4, "\n F5:" ,comp_5, )

Factors 
 F1: [ 0.00126598  0.00312951  0.00442928  0.99998109 -0.00260879] 
 F2: [-0.0279733   0.2257781   0.97355253 -0.00503633 -0.02029413] 
 F3: [-0.99674987  0.06412542 -0.0439448   0.00120082 -0.021096  ] 
 F4: [ 0.07327003  0.97081608 -0.22397672 -0.00225492 -0.04446334] 
 F5: [-0.01836009  0.04917855  0.00889579  0.00243507  0.99857865]


### Variance 

In [23]:
sum_of_evals = np.sum(eigenvalues)
var_1 = (princ_val_1 / sum_of_evals) *100
var_2 = (princ_val_2 / sum_of_evals) * 100
var_3 = (princ_val_3 / sum_of_evals) * 100
var_4 = (princ_val_4 / sum_of_evals) * 100
var_5 = (princ_val_5 / sum_of_evals) * 100

weights = [var_1, var_2, var_3, var_4, var_5]
weights

[90.48309961677323,
 5.415234377840976,
 2.8964452134375747,
 0.9718458475445441,
 0.23337494440368264]