# Verification of Thesis Results

## Imports

In [2]:
import sys
import os
import pathlib
from pathlib import Path
import sklearn
import pandas as pd
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import cifar100, cifar10, mnist

from ...src.models.baseline.helper import *

## Directory Information

In [6]:
cwd = Path(os.getcwd())
root = cwd.parent.parent
data = pathlib.PurePath(root, 'data')

## Parameter Grid

### First Test (Variable K)
* K = [50, 100, 200, 500, 1000]
* $\epsilon$ = 0.01

### Second Test (Variable $\epsilon$)
* K = 1000
* $\epsilon$ = [0.01, 0.05, 0.1, 1.0]


In [None]:
test_param_grid ={
    1 : {
        'K' : [50, 100, 200, 500, 1000],
        'epsilon' : 0.01
    },
    2 : {
        'K' : 1000,
        'epsilon' : [0.01, 0.05, 0.1, 1.0]
    }
}

### Model Parameters

Complete Information uses same model as CIFAR

In [None]:
model_param_grid = {
    'MNIST' : {
        'batch_size' : 1000,
        'epochs' : 15
    },
    'CIFAR' : {
        'batch_size' : 64,
        'epochs' : 30
    }
}

## Load Data

* MNIST
* CIFAR 10
* CIFAR 100

In [None]:
datasets = {
    'MNIST' : dataset_normalize(mnist.load_data()),
    'CIFAR10' : dataset_normalize(cifar10.load_data()),
    'CIFAR100' : dataset_normalize(cifar100.load_data())
}

## Constants

In [None]:
mnist_shape = (28, 28, 1)
cifar_shape = (32, 32, 3) 

seed = 8008

## Strategy 1

## MNIST

In [None]:
mnist_results = pd.Dataframe()

### K-Means Baseline


In [None]:
x_train = np.expand_dims(datasets['MNIST']['x_train'], -1)
x_test = np.expand_dims(datasets['MNIST']['x_test'], -1)
y_train = datasets['MNIST']['y_train']
y_test = datasets['MNIST']['y_test']

In [None]:
x_train = x_train.reshape(len(x_train), -1)

In [None]:
for K in test_param_grid[1]['K']:
    result = runKmeans1(K, (x_train, x_test), (y_train, y_test), mnist_shape)

### Neighbourhood Intermediate Calulations

In [None]:
x_train = flatten(datasets['MNIST']['x_train'])
y_train = datasets['MNIST']['y_train']

In [None]:
partitions = {}
interim_store = pathlib.PurePath(data, 'interim')

for K in test_param_grid[1]['K']:
    store = pathlib.PurePath(interim_store, str(K) + '_MNISTpartitions.tsv')
    partitions[K] = partition(x_train, K, SEED=seed, write_path=store)

### Gaussian Neighbourhood

### $\epsilon$ Neighbourhood

### Compilation

## CIFAR 10

In [None]:
cifar10_results = pd.Dataframe()

In [None]:
x_train = flatten(datasets['CIFAR10']['x_train'])
y_train = datasets['CIFAR10']['y_train']


### K-Means Baseline


In [None]:
x_train = np.expand_dims(datasets['CIFAR10']['x_train'], -1)
x_test = np.expand_dims(datasets['CIFAR10']['x_test'], -1)
y_train = datasets['CIFAR10']['y_train']
y_test = datasets['CIFAR10']['y_test']

### Gaussian Neighbourhood

### $\epsilon$ Neighbourhood

## CIFAR 100

In [None]:
cifar10_results = pd.Dataframe()

In [None]:
x_train = flatten(datasets['CIFAR100']['x_train'])
y_train = datasets['CIFAR100']['y_train']


### K-Means Baseline


In [None]:
x_train = np.expand_dims(datasets['CIFAR100']['x_train'], -1)
x_test = np.expand_dims(datasets['CIFAR100']['x_test'], -1)
y_train = datasets['CIFAR100']['y_train']
y_test = datasets['CIFAR100']['y_test']

### Gaussian Neighbourhood

### $\epsilon$ Neighbourhood

## Strategy 2

## MNIST

## CIFAR 10

## CIFAR 100