In [1]:
import create_data
import kmeans
import voltage
import os
import importlib
import time
import bpf
import numpy as np


from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml

In [2]:
importlib.reload(create_data)
importlib.reload(kmeans)
importlib.reload(voltage)
importlib.reload(bpf)

<module 'bpf' from 'C:\\Users\\avigh\\Documents\\python\\VoltageDimentionalReduction\\code\\bpf.py'>

In [3]:
print("Loading Data...")
mnist = fetch_openml('mnist_784', version=1, as_frame=False)
X, y = mnist.data, mnist.target.astype(np.int64)

# data = create_data.Data(np.array(X))

Loading Data...


In [4]:
# MNIST Pre-processing

subDivision = {}
summation = {}
count = {}

print("Sorting and averaging...")

for xi, yi in zip(X, y):
    if yi in summation:
        subDivision[yi].append(np.array(xi))
        summation[yi] += np.array(xi)
        count[yi] += 1
    else:
        subDivision[yi] = [np.array(xi)]
        summation[yi] = np.array(xi)
        count[yi] = 1

Sorting and averaging...


In [5]:
# Kmeans sampling equal points from each of the 10 digits

print("Kmeans...")

points = 100
k = points // 10
data = []

for yi in range(10):
    print(yi)
    
    partitions = kmeans.Partitions(subDivision[yi])
    partitions.k_means(k, seed=time.time())

    data += list(partitions.centers)

Kmeans...
0
1
2
3
4
5
6
7
8
9


In [6]:
# Create the landmarks

data = create_data.Data(data)

landmarks = []
for yi in range(10):
    landmark = voltage.Landmark(-1, 1)

    ignore = []
    while (landmark.index // k != yi):
        ignore.append(landmark.index)

        landmark = voltage.Landmark.createLandmarkClosestTo(data, summation[yi] / count[yi], 1, ignore=ignore)
 
    landmarks.append(landmark)

print(len(data))
print([l.index for l in landmarks])

100
[6, 16, 29, 33, 48, 58, 65, 79, 84, 93]


In [7]:
print(type(data))
print(isinstance(data, create_data.Data))

<class 'create_data.Data'>
True


In [8]:
print("Parameter Finding...")

cs = []
pgs = []

param_finder = bpf.BestParameterFinder()

for landmark in landmarks:
    c, p_g = param_finder.bestParameterFinder([landmark], data, minBound=-10, maxBound=20, granularity=3, epsilon=0.5, approx=10)
    print(c, p_g)
    cs.append(c)
    pgs.append(p_g)

print(cs)
print(pgs)

Parameter Finding...
15.0 148.4131591025766 148.4131591025766
5.0 3269017.3724721107 0.006737946999085467
1.6666666666666667 91635868.75882752 0.00024036947641951407
0.5555555555555556 278365190.4891118 7.912794612036177e-05
403148208.29230106 5.46361495394182e-05
15.0 148.4131591025766 148.4131591025766
5.0 3269017.3724721107 0.006737946999085467
1.6666666666666667 91635868.75882752 0.00024036947641951407
0.5555555555555556 278365190.4891118 7.912794612036177e-05
403148208.29230106 5.46361495394182e-05
15.0 148.4131591025766 148.4131591025766
5.0 22026.465794806718 0.006737946999085467
1.6666666666666667 785.7719942274168 0.00024036947641951407
0.5555555555555556 785.7719942274168 7.912794612036177e-05
945.6304537275181 5.46361495394182e-05
15.0 148.4131591025766 148.4131591025766
5.0 22026.465794806718 0.006737946999085467
1.6666666666666667 785.7719942274168 0.00024036947641951407
0.5555555555555556 1369.529079613122 7.912794612036177e-05
945.6304537275181 5.46361495394182e-05
15.0 

In [9]:
c, p_g = param_finder.bestParameterFinder(landmarks, data, minBound=-10, maxBound=10, granularity=10, epsilon=0.5, approx=10)

10.0 1.0 1.0
1.0 403.4287934927351 0.00012340980408667956


In [20]:
voltages = []

for index in range(0, len(landmarks)):
    problem = voltage.Problem(data)
    problem.setKernel(problem.gaussiankernel)
    problem.setWeights(c)
    problem.addLandmark(landmarks[index])
    problem.addUniversalGround(p_g)
    voltages.append(voltage.Solver(problem).approximate_voltages(max_iters=10))

In [13]:
param_finder.visualizations(voltages, "../inputoutput/matplotfigures/MNIST")

<Figure size 640x480 with 0 Axes>

<Figure size 800x600 with 0 Axes>

In [14]:
print(voltages[0])

[1.26306156e-01 2.39128288e-02 2.40488238e-02 8.35614357e-02
 1.95382105e-01 9.08818360e-02 1.00000000e+00 7.43354687e-02
 7.15647034e-02 2.52067993e-01 4.33245434e-04 4.93038348e-04
 3.87584089e-04 4.23213659e-04 3.75362182e-04 5.43256224e-04
 4.47276590e-04 7.02367280e-04 6.88584956e-04 3.49791148e-04
 1.38718652e-03 1.79103596e-03 4.25058064e-03 4.35354487e-03
 1.73708982e-03 2.31053828e-03 3.03902550e-03 1.94809800e-03
 1.12411872e-02 3.06529106e-03 8.30054360e-03 5.61742996e-03
 3.04551144e-03 3.66728155e-03 3.86487723e-03 5.59375945e-03
 7.13010234e-03 2.41241121e-03 3.50763605e-03 5.91064369e-03
 1.15587301e-03 1.58165456e-03 2.95669731e-03 7.98231301e-04
 1.44631492e-03 1.44945514e-03 1.24216414e-03 1.09865311e-03
 1.34344628e-03 1.47232355e-03 6.21230858e-03 5.44795690e-03
 1.13649130e-02 1.33933748e-02 6.21426345e-03 2.10673441e-02
 2.00128017e-03 6.26663140e-03 7.01151177e-03 4.65016462e-03
 5.75833978e-03 1.01866636e-03 3.42222237e-03 4.35357070e-03
 4.62776747e-03 7.490872

In [15]:
print(voltages[1])

[4.13812440e-03 2.01195393e-04 6.71292982e-04 1.65446520e-04
 1.20240713e-03 6.64310963e-03 1.12404598e-03 5.69935988e-05
 1.30751614e-03 3.23059678e-04 2.81990922e-01 4.38608002e-01
 3.47433107e-01 5.14213968e-01 1.67379832e-01 2.85864115e-01
 1.00000000e+00 1.68176016e-01 3.41528625e-01 6.23668986e-02
 2.50364360e-02 6.57443521e-02 3.75973821e-03 1.32433326e-02
 1.10664178e-02 2.40422065e-02 6.76954298e-03 3.88710731e-02
 5.68967767e-03 1.80948640e-02 1.63138166e-02 2.73579912e-02
 4.48200742e-02 1.52710495e-02 1.03370311e-02 1.42431761e-02
 1.06683785e-02 3.11129531e-02 7.07947231e-03 8.41947745e-03
 1.07328742e-02 2.59176269e-02 1.67672179e-02 2.84829845e-03
 1.40338506e-02 7.22229703e-03 3.97006170e-03 7.98958133e-03
 1.04648743e-02 2.35151400e-02 4.86779776e-03 2.40236282e-02
 1.04278612e-02 8.41297459e-03 9.56379705e-03 1.28205294e-02
 5.08146719e-02 2.68565624e-02 2.11922399e-02 2.07603540e-02
 5.40942957e-03 2.28373734e-03 6.00581123e-02 1.89230348e-02
 2.27256046e-03 1.575922

In [21]:
predicted = np.argmax(voltages, axis=0)
correct = np.repeat(np.arange(10), k)

num_incorrect = np.sum(predicted != correct)

accuracy = np.mean(predicted == correct)
error_rate = 1 - accuracy

print(predicted)
print(f"Incorrect predictions: {num_incorrect}/100")
print(f"Accuracy: {accuracy:.2%}")
print(f"Error Rate: {error_rate:.2%}")

[0 0 0 0 0 5 0 0 0 0 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 5 5 5 3 3 5 3
 5 3 3 4 9 4 4 4 4 4 4 4 4 3 5 5 5 5 5 1 5 5 5 6 2 6 6 6 6 6 6 6 6 7 7 7 7
 7 7 7 8 7 7 8 8 8 8 8 3 8 8 8 8 4 4 9 9 9 9 4 9 4 9]
Incorrect predictions: 16/100
Accuracy: 84.00%
Error Rate: 16.00%
