-
Notifications
You must be signed in to change notification settings - Fork 2
/
kernelpca.py
64 lines (46 loc) · 1.66 KB
/
kernelpca.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import numpy as np
from copy import deepcopy
import random, math, sys
from scipy.spatial.distance import pdist, squareform
from scipy import exp
from scipy.linalg import eigh
import matplotlib.pyplot as plt
numoffeatures = 10000
newnumoffeatures = 100
numofdata = 100
def populatedata(values):
fp = open('arcene_train.data')
data = fp.readlines()
for i in range(numofdata):
line = data[i].split(' ')
line = line[:len(line) - 1]
if line == ['']:
continue
line = [int(x) for x in line]
for j in range(len(line)):
values[i][j] = line[j]
def getlabels(filename, ranges):
fp = open(filename)
data = fp.readlines()
labels = []
for i in range(ranges):
label = int(data[i])
labels.append(label)
return labels
def kernelpca(data, gamma):
squaredistances = pdist(data, 'sqeuclidean')
sqdistmatrix = squareform(squaredistances)
kernel = exp(-gamma * sqdistmatrix)
onen = np.ones((numofdata, numofdata)) / numofdata
kernel = kernel - onen.dot(kernel) - kernel.dot(onen) + onen.dot(kernel).dot(onen)
eigvals, eigvecs = eigh(kernel)
return np.column_stack((eigvecs[:,-i] for i in range(1, newnumoffeatures+1)))
if __name__ == '__main__':
random.seed()
data = np.zeros((numofdata, numoffeatures))
populatedata(data)
labels = getlabels('arcene_train.labels', numofdata)
newdata = kernelpca(data, 0.2)
# plt.scatter([newdata[i][0] for i in range(len(newdata)) if labels[i] == 1], [newdata[i][1] for i in range(len(newdata)) if labels[i] == 1], color='red', alpha=0.5)
# plt.scatter([newdata[i][0] for i in range(len(newdata)) if labels[i] == -1], [newdata[i][1] for i in range(len(newdata)) if labels[i] == -1], color='blue', alpha=0.5)
# plt.show()