In [19]:
import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD, NMF
import math
from PIL import Image

In [2]:
df = pd.read_csv('../Datasets/bars.csv', header=None)

In [3]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,0,0,0,0,0,255,0,0,0,0,...,255,255,255,255,255,255,255,255,255,255
1,0,0,255,0,0,0,0,0,0,0,...,255,255,0,0,255,0,0,0,0,0
2,0,255,0,255,0,0,255,0,0,255,...,255,255,0,255,0,255,0,0,255,0
3,255,255,255,255,255,255,255,255,0,0,...,0,0,0,0,0,0,0,255,0,0
4,0,255,0,0,255,0,0,0,0,255,...,255,255,255,255,255,255,255,255,255,255


In [4]:
def reduce(selector, k):
    reduced = selector.fit_transform(df)
    original = selector.inverse_transform(reduced)
    
    delta = df - original
    norm = np.linalg.norm(delta, ord='fro')
    name = type(selector).__name__
    print( name)
    print('K = {}'.format(k))
    print('Norm = {}'.format(norm))
    filename = "{}-{}.png".format(k, name)
    make_image_matrix(100, 100, original, filename)
    basis_filename = "base-{}-{}.png".format(k, name)
    make_image_matrix(1, k, selector.components_ * 10, basis_filename)

In [23]:
def make_image_matrix(rows, cols, images, filename):
    img_height = int(math.sqrt(len(images[0])))
    img_width = img_height
    mat_width = cols*img_width
    mat_height = rows*img_height
    matrix = Image.new('L', (mat_width, mat_height))
    for row in range(rows):
        for col in range(cols):
            idx = row*cols + col
            data = images[idx].astype(np.int32)
            data = data.reshape((img_height, img_width), order='F')
            img = Image.fromarray(data, mode='I')
            matrix.paste(img, box=(col*img_width, row*img_height))
    matrix.save(filename)

In [24]:
for k in [5, 10, 16, 20, 50, 63]:
    svd = TruncatedSVD(n_components=k)
    reduce(svd, k)
    
    nmf = NMF(n_components=k) 
    reduce(nmf, k)

TruncatedSVD
K = 5
Norm = 85409.48396501168
NMF
K = 5
Norm = 85670.8244701357
TruncatedSVD
K = 10
Norm = 63793.62904147377
NMF
K = 10
Norm = 65256.44606934091
TruncatedSVD
K = 16
Norm = 30995.723028985707
NMF
K = 16
Norm = 31348.308501825828
TruncatedSVD
K = 20
Norm = 29569.04253409124
NMF
K = 20
Norm = 30034.56608883381
TruncatedSVD
K = 50
Norm = 16125.300506277628
NMF
K = 50
Norm = 19087.081368182513
TruncatedSVD
K = 63
Norm = 4141.906185137058
NMF
K = 63
Norm = 12528.682182470886
