In [6]:
import numpy as np
import pandas as pd

np.random.seed(23)

# Class 1 data
mu_vec1 = np.array([0, 0, 0])
cov_mat1 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
class1_sample = np.random.multivariate_normal(mu_vec1, cov_mat1, 20)

df = pd.DataFrame(class1_sample, columns=['feature1', 'feature2', 'feature3'])
df['target'] = 1

# Class 2 data
mu_vec2 = np.array([1, 1, 1])
cov_mat2 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
class2_sample = np.random.multivariate_normal(mu_vec2, cov_mat2, 20)

df1 = pd.DataFrame(class2_sample, columns=['feature1', 'feature2', 'feature3'])
df1['target'] = 0

# Using pd.concat to combine DataFrames
df = pd.concat([df, df1], ignore_index=True)

# Shuffle the DataFrame
df = df.sample(frac=1, random_state=1).reset_index(drop=True)  # Shuffling and resetting index

print(df)


    feature1  feature2  feature3  target
0  -0.367548 -1.137460 -1.322148       1
1   2.224431  0.230401  1.192120       0
2   1.772258 -0.347459  0.670140       1
3   0.731858  0.517441  2.244610       0
4   2.011059  1.920996  2.933090       0
5   1.425140  1.441152  0.182561       0
6   1.676860  4.187503 -0.080565       0
7   0.384865  1.323546 -0.103193       0
8  -0.992574 -0.161346  1.192404       1
9   0.898907  0.435960  0.820964       0
10 -0.723253  1.461259 -0.085367       0
11  0.190141  0.512137  0.131538       1
12  1.437892  1.099723  1.065406       0
13 -1.389866  0.666726  1.343517       0
14  2.823378 -0.332863  2.637391       0
15  0.204637 -0.011535  3.150780       0
16  0.322272  0.060343 -1.043450       1
17  0.420623  0.411620 -0.071324       1
18  1.415320  0.457711  0.728876       1
19  1.233898  0.052778 -0.261576       0
20  1.010229  1.437830  2.327788       0
21  0.748855  2.593111  1.170818       0
22  0.177061 -0.598109  1.226512       0
23  1.250737  0.

In [7]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

df.iloc[:,0:3] = scaler.fit_transform(df.iloc[:,0:3])

In [8]:
covariance_matrix = np.cov([df.iloc[:,0],df.iloc[:,1],df.iloc[:,2]])
print('Covariance Matrix:\n', covariance_matrix)

Covariance Matrix:
 [[1.02564103 0.20478114 0.080118  ]
 [0.20478114 1.02564103 0.19838882]
 [0.080118   0.19838882 1.02564103]]


In [9]:
eigen_values, eigen_vectors = np.linalg.eig(covariance_matrix)

In [10]:
eigen_values

array([1.3536065 , 0.94557084, 0.77774573])

In [11]:
eigen_vectors

array([[-0.53875915, -0.69363291,  0.47813384],
       [-0.65608325, -0.01057596, -0.75461442],
       [-0.52848211,  0.72025103,  0.44938304]])

In [12]:
pc = eigen_vectors[0:2]
pc

array([[-0.53875915, -0.69363291,  0.47813384],
       [-0.65608325, -0.01057596, -0.75461442]])

In [13]:
transformed_df = np.dot(df.iloc[:,0:3],pc.T)
new_df = pd.DataFrame(transformed_df,columns=['PC1','PC2'])
new_df['target'] = df['target'].values
new_df.head()

Unnamed: 0,PC1,PC2,target
0,0.599433,1.795862,1
1,-0.470834,-1.373121,0
2,-0.094556,-0.761566,1
3,0.484802,-1.217708,0
4,-0.75789,-2.422615,0
