-
-
Notifications
You must be signed in to change notification settings - Fork 364
/
Copy pathpca_with_python.py
77 lines (50 loc) · 1.97 KB
/
pca_with_python.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# -*- coding: utf-8 -*-
"""pca_with_python.ipynb
Automatically generated by Colaboratory.
Original file is located at
**Import Libraries**
"""
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
"""**Load Iris Data**"""
iris = load_iris()
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['class'] = iris.target
print(df)
"""**Get the value of x and y**"""
x = df.drop(labels='class', axis=1).values
y = df['class'].values
print(x.shape, y.shape)
"""**Implementation of PCA**"""
class convers_pca():
def __init__(self, no_of_components):
self.no_of_components = no_of_components
self.eigen_values = None
self.eigen_vectors = None
def transform(self, x):
return np.dot(x - self.mean, self.projection_matrix.T)
def inverse_transform(self, x):
return np.dot(x, self.projection_matrix) + self.mean
def fit(self, x):
self.no_of_components = x.shape[1] if self.no_of_components is None else self.no_of_components
self.mean = np.mean(x, axis=0)
cov_matrix = np.cov(x - self.mean, rowvar=False)
self.eigen_values, self.eigen_vectors = np.linalg.eig(cov_matrix)
self.eigen_vectors = self.eigen_vectors.T
self.sorted_components = np.argsort(self.eigen_values)[::-1]
self.projection_matrix = self.eigen_vectors[self.sorted_components[:self.no_of_components]]
self.explained_variance = self.eigen_values[self.sorted_components]
self.explained_variance_ratio = self.explained_variance / self.eigen_values.sum()
"""**Standardization**"""
std = StandardScaler()
transformed = StandardScaler().fit_transform(x)
"""**PCA with Component = 2**"""
pca = convers_pca(no_of_components=2)
pca.fit(transformed)
"""**Plotting**"""
x_std = pca.transform(transformed)
plt.figure()
plt.scatter(x_std[:, 0], x_std[:, 1], c=y)