# Training and testing neural networks with simplified, synthetic data

This notebook consists of my findings when training a number of neural networks using simplified, synthetic data with parameter variation.

The convolutional neural networks (CNN) used in these experiments rely on classified training and testing data to train the CNN and determine its loss and accuracy on unseen data. However, it is not certain how much the CNN relies on the _quality_ of the data, i.e. the specific problem at hand; a CNN needs data to function but should it be useful in only solving one problem? How do we identify whether a CNN is good at solving many problems?

One way to check this is to simplify the problem. If a CNN is designed in such a way that its accuracy is directly related to the complexity of the problem, a simpler dataset should therefore produce more accurate results. By training multiple CNNs with different problem spaces on the same type of dataset, it should be possible to infer the inherent quality of a network architecture.

The problem of image classification can be simplified to something that can easily have its complexity increased—polygons. A shape generation program in Python has been created which returns a set of NumPy arrays of training and test images and labels that can be used by CNNs implemented in TensorFlow/Keras. This shape generation tool is called directly by the CNN.

## Variables
- Image quantity (20k, 40k, 60k, 80k, 100k)
- Noise applications (0-2) 3
- Label count (2-5) (circle, triangle, square, pentagon, hexagon)
- Epochs (100)

## Choice of Neural Networks
- Fashion MNIST [Margaret Maynard-Reid](https://github.com/margaretmz/deep-learning/blob/master/fashion_mnist_keras.ipynb)
- CIFAR-10 by the Keras team [Keras](https://github.com/keras-team/keras/blob/master/examples/cifar10_cnn.py)

In [1]:
import os
import re
import sys

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

from IPython.display import Image
from ipywidgets import interact

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

url = 'results.csv'

data = pd.read_csv(url)

@interact(option=['Average', 'Average both', 'Best', 'Best both'])
def choose_graph(option):
    if option == 'Average':
        @interact(cnn=['Fashion-MNIST', 'CIFAR-10'],
                  x_axis=['Training image count', 'Noise addition', 'Label count', 'Epoch count'],
                  y_axis=['Loss', 'Accuracy'])
        def show_average_graph(cnn, x_axis, y_axis):
            fig = go.Figure(
                data=[go.Scatter(
                    x=(data[x_axis].unique().tolist()),
                    y=(data.loc[data['CNN'] == cnn].groupby(x_axis)[y_axis].mean().tolist())
                )]
            )
            fig.update_layout(
                title=f'The average effect of {x_axis} on {y_axis} for {cnn}',
                xaxis_title=x_axis,
                yaxis_title=y_axis,
            )
            fig.show()
    elif option == 'Best':
        @interact(cnn=['Fashion-MNIST', 'CIFAR-10'],
                  x_axis=['Training image count', 'Noise addition', 'Label count', 'Epoch count'],
                  y_axis=['Loss', 'Accuracy'])
        def show_best_graph(cnn, x_axis, y_axis):
            if y_axis == 'Loss':
                y_data = data.loc[data['CNN'] == cnn].groupby(x_axis)[y_axis].min().tolist()
            else:
                y_data = data.loc[data['CNN'] == cnn].groupby(x_axis)[y_axis].max().tolist()
            fig = go.Figure(
                data=[go.Scatter(
                    x=(data[x_axis].unique().tolist()),
                    y=(y_data)
                )]
            )
            fig.update_layout(
                title=f'The best results of {x_axis} on {y_axis} for {cnn}',
                xaxis_title=x_axis,
                yaxis_title=y_axis,
            )
            fig.show()
    elif option == 'Average both':
        @interact(x_axis=['Training image count', 'Noise addition', 'Label count', 'Epoch count'],
                  y_axis=['Loss', 'Accuracy'])
        def show_average_graph(x_axis, y_axis):
            fig = go.Figure()
            fig.add_trace(
                go.Scatter(x=data[x_axis].unique().tolist(),
                           y=data.loc[data['CNN'] == 'Fashion-MNIST'].groupby(x_axis)[y_axis].mean().tolist(),
                            mode='lines',
                            name='Fashion-MNIST')
            )
            fig.add_trace(
                go.Scatter(x=data[x_axis].unique().tolist(),
                           y=data.loc[data['CNN'] == 'CIFAR-10'].groupby(x_axis)[y_axis].mean().tolist(),
                            mode='lines',
                            name='CIFAR-10')
            )
            fig.update_layout(
                title=f'The average effect of {x_axis} on {y_axis}',
                xaxis_title=x_axis,
                yaxis_title=y_axis,
            )
            fig.show()
    elif option == 'Best both':
        @interact(x_axis=['Training image count', 'Noise addition', 'Label count', 'Epoch count'],
                  y_axis=['Loss', 'Accuracy'])
        def show_best_graph(x_axis, y_axis):
            if y_axis == 'Loss':
                fashion_y_data = data.loc[data['CNN'] == 'Fashion-MNIST'].groupby(x_axis)[y_axis].min().tolist()
                cifar_y_data = data.loc[data['CNN'] == 'CIFAR-10'].groupby(x_axis)[y_axis].min().tolist()
            else:
                fashion_y_data = data.loc[data['CNN'] == 'Fashion-MNIST'].groupby(x_axis)[y_axis].max().tolist()
                cifar_y_data = data.loc[data['CNN'] == 'CIFAR-10'].groupby(x_axis)[y_axis].max().tolist()
            fig = go.Figure()
            fig.add_trace(
                go.Scatter(x=data[x_axis].unique().tolist(),
                           y=fashion_y_data,
                            mode='lines',
                            name='Fashion-MNIST')
            )
            fig.add_trace(
                go.Scatter(x=data[x_axis].unique().tolist(),
                           y=cifar_y_data,
                            mode='lines',
                            name='CIFAR-10')
            )
            fig.update_layout(
                title=f'The best results of {x_axis} on {y_axis}',
                xaxis_title=x_axis,
                yaxis_title=y_axis,
            )
            fig.show()

interactive(children=(Dropdown(description='option', options=('Average', 'Average both', 'Best', 'Best both'),…

In [2]:
def scale(v):
    return 1 / (1 - v)

scale_values = np.vectorize(scale)

@interact(cnn=['Fashion-MNIST', 'CIFAR-10'],
          x_axis=['Training image count','Noise addition','Label count','Epoch count'],
          y_axis=['Training image count','Noise addition','Label count','Epoch count'],
          z_axis=['Loss', 'Accuracy'])
def show_graph_3d(cnn, x_axis, y_axis, z_axis):
    layout = go.Layout(
        title=f'{cnn} effect of {x_axis} and {y_axis} on {z_axis}',
        scene = dict(
            xaxis = dict(
                title=x_axis),
            yaxis = dict(
                title=y_axis),
            zaxis = dict(
                title=z_axis)
            ),
            margin=dict(r=0, b=30,l=0, t=30))
    
    fig = go.Figure(
        data=[go.Mesh3d(
            x=(data[x_axis].tolist()),
            y=(data[y_axis].tolist()),
            z=(data[z_axis].tolist()),
            opacity=0.5,
            color='rgba(22,100,244,1)')],
        layout=layout)
    fig.show()


interactive(children=(Dropdown(description='cnn', options=('Fashion-MNIST', 'CIFAR-10'), value='Fashion-MNIST'…

In [3]:
fdir = os.path.abspath(os.path.join('.', 'failures'))

@interact(cnn=['cifar', 'fashion'],
          train=list(range(20000,100001,20000)),
          noise=list(range(3)),
          label=list(range(2,6)))
def define_img_list(cnn,
                    train,
                    noise,
                    label,):
    img_list=list(filter(re.compile(rf"{cnn}-{train}-{noise}n-{label}s-.*").match, os.listdir(fdir)))
    @interact(file=img_list)
    def show_images(file):
        if file != None:
            display(Image(fdir+'/'+file))

interactive(children=(Dropdown(description='cnn', options=('cifar', 'fashion'), value='cifar'), Dropdown(descr…