## In this notebook I will run experiments with pictures.

In [34]:
import mnist
import numpy as np
import random as rd
import matplotlib.pyplot as plt
from sklearn.decomposition import NMF
from desummation import Desummation

train_images = mnist.train_images()
train_labels = mnist.train_labels()

max_points = 1000

In [208]:
zeroes = list(np.where(np.array(train_labels) == 0)[0][:max_points])
ones = list(np.where(np.array(train_labels) == 1)[0][:max_points])
twos = list(np.where(np.array(train_labels) == 2)[0][:max_points])
threes = list(np.where(np.array(train_labels) == 3)[0][:max_points])
fours = list(np.where(np.array(train_labels) == 4)[0][:max_points])
fives = list(np.where(np.array(train_labels) == 5)[0][:max_points])
sixs = list(np.where(np.array(train_labels) == 6)[0][:max_points])
sevens = list(np.where(np.array(train_labels) == 7)[0][:max_points])
eights = list(np.where(np.array(train_labels) == 8)[0][:max_points])
nines = list(np.where(np.array(train_labels) == 9)[0][:max_points])
all = [zeroes, ones, twos, threes, fours, fives, sixs, sevens, eights, nines]

## Quick demonstration how pictures look like:

In [36]:
def take_example(**kwargs):
    index = kwargs.get('index', rd.randint(0, 10000))
    return np.resize(train_images[index], (28, 28))

In [37]:
#image = take_example()
#plt.imshow(image, cmap='gray')
#plt.axis('off')
#plt.show()

![example picture](picture/example_number.png)

In [38]:
dsm_pictures = Desummation()

In [39]:
index1 = rd.randint(10000, 40000)
label1 = train_labels[index1]
example1 = take_example(index=index1)

### First, let's create for example 200 weights (best result achieves for $\geq$ 784) and see how well will they approximate some random picture 

In [40]:
dim = 200
dsm_pictures.fit(example1, dim)
#for j in range(dim):
    #dsm_pictures.basis.matrices[j] = np.resize(train_images[j], (28, 28))
picture_one = dsm_pictures.predict(example1)
print("Answer", label1)
print('Index', index1)
#plt.imshow(picture_one, cmap='gray')
#plt.axis('off')
#plt.show()

Answer 0
Index 37510


![noisy number](picture/noise.png)

## Well... not very good. But now let us change the random matrices on some actual pictures from *zeroes* and *ones* lists `and leave only 2 weights`

In [195]:
list1 = zeroes
list2 = ones

index2 = rd.choice(list1)
label2 = train_labels[index2]
example2 = take_example(index=index2)

dsm_pictures.fit(example2, 100)

dsm_pictures.basis.matrices[0] = np.resize(train_images[rd.choice(list1)], (28, 28))
dsm_pictures.basis.matrices[1] = np.resize(train_images[rd.choice(list2)], (28, 28))

### Now let's look on the predict

In [42]:
predicted_picture1 = dsm_pictures.predict(example2)
print("Answer", label2)
print('Index', index2)
#plt.imshow(predicted_picture1, cmap='gray')
#plt.axis('off')
#plt.show()

Answer 0
Index 8515


![zero_picture](picture/zero.png)

## With only 2 weights, only 2 training samples, we can now diffirentiate zeroes from ones and moreover with brilliant accuracy!
### Let us visualise this with plot:

In [165]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [211]:
def mnist_classificator(m=2, dim=1, avrg_number=4):
    '''
    Arguments:
        m (int): How many to classificate
        dim (int): How many weights to predict for each number
        avrg_number (int): How much matrices to take for average (recommended 1-20)

    This is very simple example architecture for predicting on MNIST digits dataset.
    It classificates "m" numbers with basis matrices taken from the dataset and averaged as (their sum) / "avrg_number".
    Then repeats creating a basis matrices "dim" times for each number. 
    Thus resulting in, as example, 3 base matrices for zero, 3 base matrices for one and in total 6 base matrices(~weights)
    So, to create 100 parameters you would need, for example, pass m=9 and dim=10

    The plot will always be for distribution of first two weights.
    '''
    x_coords = []
    y_coords = []
    labels = []
    X = []
    dsm_pictures = Desummation()
    dsm_pictures.fit(np.zeros((28, 28)), m*(dim-1)+m+1)
    
    list_of_all = []
    all_list = []
    for k in range(m):
        rd.shuffle(all[k])
        list_of_all.append(all[k][:avrg_number*dim])
        all_list += all[k][avrg_number*dim:]

    for i in range(dim):
        for j in range(m):
            listj_train = list_of_all[j][avrg_number*i:avrg_number*(i+1)]
            dsm_pictures.basis.matrices[m*i+j] = sum([np.resize(train_images[el], (28, 28)) for el in listj_train])//avrg_number

    # For better weights stability
    dsm_pictures.basis.matrices[m*(dim-1)+m] = np.zeros((28, 28))

    for index in all_list:
        picture = train_images[index]
        picture = np.resize(picture, (28, 28))
        dsm_pictures.predict(picture)
        
        weights = dsm_pictures.weights()
        X.append(weights)
        x_coords.append(weights[0])
        y_coords.append(weights[1])
        for l in range(9):
            if index in all[l]:
                labels.append(l)

    x_coords = np.array(x_coords)
    y_coords = np.array(y_coords)

    # A simple scikit-learn LogisticRegression model
    model = LogisticRegression()
    X_train, X_test, y_train, y_test = train_test_split(X, labels)
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    score = accuracy_score(y_test, predictions)
    print('Accuracy of a classifcation:', score)
    plt.scatter(x_coords, y_coords, c=labels)
    plt.xlabel('X Coordinate')
    plt.ylabel('Y Coordinate')
    plt.title('Plot of Predictions')
    plt.show()


    # nines is very bad with: sevens, fives, fours

In [236]:
#mnist_classificator(m=2, dim=1, avrg_number=1)

![zeroes with ones differentiation](picture/zeroes_ones.png)

### After experimenting a little with whole dataset, I got 87% with LogisticRegression model and 100 parameters with this architecture.
### Now I end my project there as now I see a lot of possible researches for which I am not ready on my own.

### For some further intuition I refer you to [this page](https://blog.acolyer.org/2019/02/18/the-why-and-how-of-nonnegative-matrix-factorization/) about Non-Negative Matrix Factorization and for further intuition search for TensorTrain decomposition.

### Personally I think these researches are close to my work and one may find them interesting.