With gpu acceleration a full run takes around 1 hour. You need not rerun this as the results are included.
Link to hosted Google Colab in case of any trouble: https://colab.research.google.com/drive/1tTG1YImE60EBbeVhmLMeZG2W0gEef3Az?usp=sharing 

## Imports

In [None]:
% pip install -U plotly

Collecting plotly
[?25l  Downloading https://files.pythonhosted.org/packages/c9/09/315462259ab7b60a3d4b7159233ed700733c87d889755bdc00a9fb46d692/plotly-4.14.1-py2.py3-none-any.whl (13.2MB)
[K     |████████████████████████████████| 13.2MB 255kB/s 
Installing collected packages: plotly
  Found existing installation: plotly 4.4.1
    Uninstalling plotly-4.4.1:
      Successfully uninstalled plotly-4.4.1
Successfully installed plotly-4.14.1


In [None]:
import random
import time

import numpy as np
import pandas as pd

from tensorflow import keras
from tensorflow.keras import layers

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import gc

## Constants

In [None]:
BATCH_SIZE = 128
epochs = 10
STEPS = 8
MNIST_CLASSES = 10 # mnist, fashion_mnist, cifar10
CIFAR100_CLASSES = 100

## Dataset setup

### Load datasets

In [None]:
# Loads datasets
mnist = keras.datasets.mnist.load_data()
fashion_mnist = keras.datasets.fashion_mnist.load_data()
cifar10 = keras.datasets.cifar10.load_data()
cifar100 = keras.datasets.cifar100.load_data()

# [x] = dataset, 
# [x][0] = train, [x][1] = test, 
# [x][n][0] = data, [x][n][1] = labels
immutable_datasets = [mnist,fashion_mnist,cifar10,cifar100]

datasets = [] # [(train, test)]
for dataset in immutable_datasets:
  datasets.append([
    [dataset[0][0],dataset[0][1]],
    [dataset[1][0],dataset[1][1]]
  ])
  
namelist = ["mnist","fashion_mnist","cifar10","cifar100"]

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
Downloading data from https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz


### Format datasets

In [None]:
labels = [] # [(train labels, test labels)] (this prevents needing to use `np.argmax()` so much)

# Formats datasets
for i in range(4):
  print(namelist[i]+":")

  classes = MNIST_CLASSES
  if i == 3:
    classes = CIFAR100_CLASSES
    
  datasets[i][0][0] = datasets[i][0][0].astype("float32") / 255
  datasets[i][1][0] = datasets[i][1][0].astype("float32") / 255

  # Adding the unneeded color channel to conform with keras
  # For MNIST: (60,000,28,28) -> (60,000,28,28,1) and (10,000,28,28) -> (10,000,28,28,1)
  if i == 0 or i == 1:
    datasets[i][0][0] = np.expand_dims(datasets[i][0][0], -1)
    datasets[i][1][0] = np.expand_dims(datasets[i][1][0], -1)

  # Check shapes
  print("train data:\t", datasets[i][0][0].shape)
  print("test data:\t", datasets[i][1][0].shape)

  if (i==0 or i==1):
    labels.append([datasets[i][0][1].copy(),datasets[i][1][1].copy()])
  # For some dumb reason the cifar datasets include their labels as a list 
  #  of a single int (`[4]`) instead of a single int (4).
  else: 
    labels.append([
      np.array([label[0] for label in datasets[i][0][1]]),
      np.array([label[0] for label in datasets[i][1][1]])
    ])

  # Checks shape of just added labels
  print("labels:\t\t",len(labels[-1]),labels[-1][0].shape,labels[-1][1].shape)

  # One-hot encoding (e.g. (2,4) -> [0,0,1,0])
  datasets[i][0][1] = keras.utils.to_categorical(datasets[i][0][1], classes)
  datasets[i][1][1] = keras.utils.to_categorical(datasets[i][1][1], classes)

  print("train labels:\t", datasets[i][0][1].shape)
  print("test labels:\t", datasets[i][1][1].shape)
  print()

mnist:
train data:	 (60000, 28, 28, 1)
test data:	 (10000, 28, 28, 1)
labels:		 2 (60000,) (10000,)
train labels:	 (60000, 10)
test labels:	 (10000, 10)

fashion_mnist:
train data:	 (60000, 28, 28, 1)
test data:	 (10000, 28, 28, 1)
labels:		 2 (60000,) (10000,)
train labels:	 (60000, 10)
test labels:	 (10000, 10)

cifar10:
train data:	 (50000, 32, 32, 3)
test data:	 (10000, 32, 32, 3)
labels:		 2 (50000,) (10000,)
train labels:	 (50000, 10)
test labels:	 (10000, 10)

cifar100:
train data:	 (50000, 32, 32, 3)
test data:	 (10000, 32, 32, 3)
labels:		 2 (50000,) (10000,)
train labels:	 (50000, 100)
test labels:	 (10000, 100)



## Build models

In [None]:
mnist_conv_model = keras.Sequential([
    keras.Input(shape=(28, 28, 1)),
    layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Flatten(),
    layers.Dropout(0.5),
    layers.Dense(MNIST_CLASSES, activation="softmax"),
])

fashion_mnist_conv_model = keras.models.clone_model(mnist_conv_model)

cifar10_conv_model = keras.Sequential([
        keras.Input(shape=(32, 32, 3)),
        layers.Conv2D(32, (3, 3), padding='same', activation='relu'),
        layers.Dropout(0.2),

        layers.Conv2D(32,(3,3),padding='same', activation='relu'),
        layers.MaxPooling2D(pool_size=(2,2)),

        layers.Conv2D(64,(3,3),padding='same',activation='relu'),
        layers.Dropout(0.2),

        layers.Conv2D(64,(3,3),padding='same',activation='relu'),
        layers.MaxPooling2D(pool_size=(2,2)),

        layers.Conv2D(128,(3,3),padding='same',activation='relu'),
        layers.Dropout(0.2),

        layers.Conv2D(128,(3,3),padding='same',activation='relu'),
        layers.MaxPooling2D(pool_size=(2,2)),

        layers.Flatten(),
        layers.Dropout(0.2),
        layers.Dense(1024,activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(MNIST_CLASSES, activation='softmax')
])

cifar100_conv_model = keras.Sequential([
        keras.Input(shape=(32, 32, 3)),
        layers.Conv2D(32, (3, 3), padding='same', activation='relu'),
        layers.Dropout(0.2),
    
        layers.Conv2D(32,(3,3),padding='same', activation='relu'),
        layers.MaxPooling2D(pool_size=(2,2)),
    
        layers.Conv2D(64,(3,3),padding='same',activation='relu'),
        layers.Dropout(0.2),
    
        layers.Conv2D(64,(3,3),padding='same',activation='relu'),
        layers.MaxPooling2D(pool_size=(2,2)),
    
        layers.Conv2D(128,(3,3),padding='same',activation='relu'),
        layers.Dropout(0.2),

        layers.Conv2D(128,(3,3),padding='same',activation='relu'),
        layers.MaxPooling2D(pool_size=(2,2)),
    
        layers.Flatten(),
        layers.Dropout(0.2),
        layers.Dense(1024,activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(CIFAR100_CLASSES, activation='softmax')
])

conv_models = [
  mnist_conv_model,
  fashion_mnist_conv_model,
  cifar10_conv_model,
  cifar100_conv_model
]

In [None]:
mnist_dense_model = keras.Sequential([
        keras.Input(shape=(28, 28, 1)),
        layers.Flatten(),
        layers.Dense(2500, activation="relu"),
        layers.Dropout(0.5),
        layers.Dense(2000, activation="relu"),
        layers.Dropout(0.5),
        layers.Dense(1500, activation="relu"),
        layers.Dropout(0.5),
        layers.Dense(1000, activation="relu"),
        layers.Dropout(0.5),
        layers.Dense(500, activation="relu"),
        layers.Dropout(0.5),
        layers.Dense(MNIST_CLASSES, activation="softmax"),
])

fashion_mnist_dense_model = keras.models.clone_model(mnist_dense_model)

cifar10_dense_model = keras.Sequential([
        keras.Input(shape=(32, 32, 3)),
        layers.Flatten(),
        layers.Dense(2500, activation="relu"),
        layers.Dropout(0.5),
        layers.Dense(2000, activation="relu"),
        layers.Dropout(0.5),
        layers.Dense(1500, activation="relu"),
        layers.Dropout(0.5),
        layers.Dense(1000, activation="relu"),
        layers.Dropout(0.5),
        layers.Dense(500, activation="relu"),
        layers.Dropout(0.5),
        layers.Dense(MNIST_CLASSES, activation="softmax"),
])

cifar100_dense_model = keras.Sequential([
        keras.Input(shape=(32,32,3)),
        layers.Flatten(),
        layers.Dense(2500, activation="relu"),
        layers.Dropout(0.5),
        layers.Dense(2000, activation="relu"),
        layers.Dropout(0.5),
        layers.Dense(1500, activation="relu"),
        layers.Dropout(0.5),
        layers.Dense(1000, activation="relu"),
        layers.Dropout(0.5),
        layers.Dense(500, activation="relu"),
        layers.Dropout(0.5),
        layers.Dense(CIFAR100_CLASSES, activation="softmax"),
])

dense_models = [
  mnist_dense_model, 
  fashion_mnist_dense_model, 
  cifar10_dense_model, 
  cifar100_dense_model
]

In [None]:
models = [conv_models,dense_models]

## Randomly wrong test labels

In [None]:
# Sets of test labels
# wrong_labels =[[int],[int],[int],[int]]
wrong_labels = [label_set[1].copy() for label_set in labels] 

# Double checking label sizes
print(str(len(wrong_labels))+":",
    wrong_labels[0].shape,wrong_labels[1].shape,
    wrong_labels[2].shape,wrong_labels[3].shape
)

# For each dataset
for i in range(len(wrong_labels)):
  classes = datasets[i][1][1].shape[1]

  # For each label
  for j in range(len(wrong_labels[i])):

    # Random pick a new label of the set of possible labels, excluding the correct label
    wrong_labels[i][j] = random.choice([k for k in range(classes) if k!=wrong_labels[i][j]])

# `wrong_labels` contains semi-random labels that are guranteed to be incorrect

4: (10000,) (10000,) (10000,) (10000,)


## 0% Random (benchmark)

In [None]:
# Collects conv benchmarks
conv_los_benchmarks = []
conv_acc_benchmarks = []

start = time.time()

print("[",end =" ")
# Compiles and fits each convolutional model the respective accurate dataset
for (i,(conv_model,dataset)) in enumerate(zip(conv_models,datasets)):
  # Compiles and fits convolutional model to accurate datasets
  conv_model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
  conv_model.fit(dataset[0][0], dataset[0][1], batch_size=BATCH_SIZE, epochs=epochs, verbose=0)
  los, acc = conv_model.evaluate(dataset[1][0], dataset[1][1], verbose=0)
  conv_los_benchmarks.append(los)
  conv_acc_benchmarks.append(acc)
  # Progress check
  print(str(i+1)+"/"+str(len(datasets)),end=" ")

# Minutes taken
print("] {:.2f}".format((time.time() - start)/60)+"m")

[ 1/4 2/4 3/4 4/4 ] 2.22m


In [None]:
# Collects dense benchmarks
dense_los_benchmarks = []
dense_acc_benchmarks = []

start = time.time()

print("[",end =" ")
# Compiles and fits each dense model the respective accurate dataset
for (i,(dense_model,dataset)) in enumerate(zip(dense_models,datasets)):
  dense_model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
  dense_model.fit(dataset[0][0], dataset[0][1], batch_size=BATCH_SIZE, epochs=epochs, verbose=0)
  los, acc = dense_model.evaluate(dataset[1][0], dataset[1][1], verbose=0)
  dense_los_benchmarks.append(los)
  dense_acc_benchmarks.append(acc)
  # Progress check
  print(str(i+1)+"/"+str(len(datasets)),end=' ')

# Minutes taken
print("] {:.2f}".format((time.time() - start)/60)+"m")

[ 1/4 2/4 3/4 4/4 ] 1.66m


In [None]:
los_benchmarks = np.array([conv_los_benchmarks,dense_los_benchmarks])
acc_benchmarks = np.array([conv_acc_benchmarks,dense_acc_benchmarks])

print("los_benchmarks shape:",los_benchmarks.shape)
print("acc_benchmarks shape:",acc_benchmarks.shape)

# np.set_printoptions(precision=3)
# print("los_benchmarks:\n",los_benchmarks)
# print("acc_benchmarks:\n",acc_benchmarks)

los_benchmarks shape: (2, 4)
acc_benchmarks shape: (2, 4)


In [None]:
los_benchmarks = np.round(los_benchmarks, 3)
acc_benchmarks = np.round(acc_benchmarks, 3)

fig = go.Figure(data=[go.Table(
    header=dict(values=['Dataset','Loss', 'Accuracy']),
    cells=dict(values=[namelist,los_benchmarks[0], acc_benchmarks[0]])
)])
fig.update_layout(height=300, width=500, title_text="Conv benchmarks")
fig.show()

fig = go.Figure(data=[go.Table(
    header=dict(values=['Dataset','Loss','Accuracy']),
    cells=dict(values=[namelist,los_benchmarks[1], acc_benchmarks[1]])
)])
fig.update_layout(height=300, width=500,title_text="Dense benchmarks")
fig.show()

## Randomly wrong labelled training data

*   Conv nets are very resilient to random data.
*   Dense nets are very susceptible to random data.

My best guess for the reason is that the more rigid structure of conv nets leads them to focus their learniing on spatial patterns, while dense nets can learn more sparse abstract patterns.

In this circumstance the rigidity of the conv net prevents it learning any strong spatial patterns in the randomly labelled data. This minimises the affect randomly labelled data can have.

While the dense nets ability to recognize more abstract patterns between seemingly spatially disconnected points unfortuantely allows it a larger degree of freedom to discover and learn patterns in the randomly labelled data.

In [None]:
gc.collect()

conv_loss = [[] for i in conv_models]
conv_accuracy = [[] for i in conv_models]

rs = []
outer_start = time.time()

for step in range(STEPS):
  r = step * 1 / STEPS
  rs.append(r)
  start = time.time()
  print("[",end =" ")
  # for (model_indx,(conv_model,dense_model,dataset)) in enumerate(zip(conv_models,dense_models,datasets)):
  for (model_indx,(conv_model,dataset)) in enumerate(zip(conv_models,datasets)):
    # Set dataset
    random_training_labels = dataset[0][1].copy()

    # Randomise labels
    classes = dataset[0][1].shape[1]
    number = int(r*len(random_training_labels))
    for i in random.sample(range(0, len(random_training_labels)), number):
      random_training_labels[i] = [0 for j in range(classes)]
      random_training_labels[i][random.randint(0, classes-1)] = 1

    # Fit conv model
    conv_model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
    conv_model.fit(dataset[0][0], random_training_labels, batch_size=BATCH_SIZE, epochs=epochs, verbose=0)

    # Get evaluations
    conv_los, conv_acc = conv_model.evaluate(dataset[1][0], dataset[1][1], verbose=0)

    # Append evaluations
    conv_loss[model_indx].append(conv_los)
    conv_accuracy[model_indx].append(conv_acc)

    # Progress check
    print(str(model_indx+1)+"/"+str(len(datasets)),end =" ")
  # Model minutes
  print("]",str(step+1)+"/"+str(STEPS),"{:.2f}".format((time.time() - start)/60)+"m")

print()
# Total minutes
print("{:.2f}".format((time.time() - outer_start) / 60)+"m")

[ 1/4 2/4 3/4 4/4 ] 1/8 2.10m
[ 1/4 2/4 3/4 4/4 ] 2/8 2.11m
[ 1/4 2/4 3/4 4/4 ] 3/8 2.12m
[ 1/4 2/4 3/4 4/4 ] 4/8 2.11m
[ 1/4 2/4 3/4 4/4 ] 5/8 2.11m
[ 1/4 2/4 3/4 4/4 ] 6/8 2.13m
[ 1/4 2/4 3/4 4/4 ] 7/8 2.10m
[ 1/4 2/4 3/4 4/4 ] 8/8 2.08m

16.86m


In [None]:
conv_accuracy_df = pd.DataFrame(data=conv_accuracy,index=namelist,columns=rs)
# print(conv_loss_df.T)
fig = px.line(conv_accuracy_df.T,labels={"value":"Accuracy","index":"Randomness","variable":"Dataset"})
fig.update_layout(height=400, width=800, title_text="Accuracy Vs Randomness")
fig.show()

## Checking by difference

### Difference curve

In difference checking, we care far more about differences ~0 rather than differences ~1, as such we want a large number more samples near 0 than near 1. Here `d_curve(step)=differnece`, in the lower range of steps `d_curve(step)` are closely grouped, while has `step` increases the range between values increases, until eventually hitting 1 at `STEPS`.

In [None]:
# Difference checking steps do not involve fiting models are as such are much cheaper
DC_STEPS = 100*STEPS

In [None]:
b = 5 # Steepness of the curve, higher=more small samples
a = (1/pow(DC_STEPS,b)) # Multiplier to ensure `d_curve(STEPS)==1`
def d_curve(step):
  global a,b
  return a*pow(step,b)

In [None]:
x = np.arange(DC_STEPS)
fig = go.Figure(data=go.Scatter(x=x, y=d_curve(x)))
fig.update_layout(height=400, width=800, title_text="Difference at each step")
fig.show()

### Average prediction outputs

In [None]:
gc.collect()

# Sets start time
start = time.time()

averages = []
test_predictions = [] # Prediction results of test sets

# Fit each model to each dataset, then average the predicted outputs for each class
for (i,(conv_model,dataset,label_set)) in enumerate(zip(conv_models,datasets,labels)):
  # Fits models
  conv_model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
  conv_model.fit(dataset[0][0], dataset[0][1], batch_size=BATCH_SIZE, epochs=epochs, verbose=0)

  # Gets training set predictions
  train_predictions = conv_model.predict(dataset[0][0])

  # totals = NxN (where N=number of classes)
  classes = dataset[0][1].shape[1]
  totals = np.zeros((classes, classes))
  counts = np.zeros(classes)

  print(totals.shape,counts.shape,end=' ') # Check

  # Sets average predictions by class
  # label = actual label
  # p = one-hot encoded predictions
  for label,p in zip(label_set[0],train_predictions):
    totals[label] += p
    counts[label] += 1

  averages.append([t/c for (t,c) in zip(totals,counts)])

  test_predictions.append(conv_model.predict(dataset[1][0]))

  print(str(i+1)+"/"+str(len(datasets)))

# Minutes taken
print()
print("{:.2f}".format((time.time() - start) / 60)+"m")

(10, 10) (10,) 1/4
(10, 10) (10,) 2/4
(10, 10) (10,) 3/4
(100, 100) (100,) 4/4

2.19m


In [None]:
print(len(averages),end=' ')
print(
    np.array(averages[0]).shape,np.array(averages[1]).shape,
    np.array(averages[2]).shape,np.array(averages[3]).shape
)

4 (10, 10) (10, 10) (10, 10) (100, 100)


### Running difference checking

In [None]:
gc.collect()

# Sets start time
start = time.time()

# Number of correctly labelled examples, discarded
# (When an example is correctly labelled, yet the systems check discards it)
falsely_thrown_out = np.zeros((len(datasets),DC_STEPS))

# Number of wrongly labelled examples, accepted
# (When an example is incorrectly labelled, yet the systems check fails to discard it)
falsely_let_in = np.zeros((len(datasets),DC_STEPS))

# difference allowed
ds = [d_curve(step) for step in range(DC_STEPS)]

# Differences between predictions and accurate one-hot encoded classes
correct_differences = [
    [abs(p-averages[i][l]) for (p,l) in zip(test_predictions[i],labels[i][1])]
    for i in range(len(datasets))
]
# Differences between predictions and inaccurate one-hot encoded classes
incorrect_differences = [
    [abs(p-averages[i][l]) for (p,l) in zip(test_predictions[i],wrong_labels[i])]
    for i in range(len(datasets))
]

#check
print(np.array(correct_differences).shape,np.array(incorrect_differences).shape)

for (step,d) in enumerate(ds):
  
  for i in range(len(datasets)):
    falsely_thrown_out_temp = 0
    falsely_let_in_temp = 0

    for (ce_diff,ie_diff) in zip(correct_differences[i],incorrect_differences[i]):
      if np.greater(ce_diff,d).all():
        falsely_thrown_out_temp += 1

      if np.less(ie_diff,d).all():
        falsely_let_in_temp += 1

    # falsely_thrown_out[i].append(falsely_thrown_out_temp)
    falsely_thrown_out[i][step] = falsely_thrown_out_temp
    
    #falsely_let_in[i].append(falsely_let_in_temp)
    falsely_let_in[i][step] = falsely_let_in_temp

  #print(str(step+1),end =" ")
  if (step+1)%10 == 0:
    print("{:4d}".format(step+1),end =" ")
    if (step+1)%100 == 0:
      print()

# Prints time taken
print()
print("{:.2f}".format((time.time() - start) / 60)+"m")


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray



(4, 10000) (4, 10000)
  10   20   30   40   50   60   70   80   90  100 
 110  120  130  140  150  160  170  180  190  200 
 210  220  230  240  250  260  270  280  290  300 
 310  320  330  340  350  360  370  380  390  400 
 410  420  430  440  450  460  470  480  490  500 
 510  520  530  540  550  560  570  580  590  600 
 610  620  630  640  650  660  670  680  690  700 
 710  720  730  740  750  760  770  780  790  800 

2.19m


### Graphs

In [None]:
falsely_thrown_out_df = pd.DataFrame(
    data=falsely_thrown_out,
    index=namelist,
    columns=ds
)
#print(falsely_thrown_out.T)
fig = px.line(
    falsely_thrown_out_df.T.head(250),
    labels={"value":"Discarded","index":"Difference","variable":"Dataset"}
)
fig.update_layout(height=400, width=800, title_text="Accurate examples discarded")
fig.show()

falsely_let_in_df = pd.DataFrame(
    data=falsely_let_in,
    index=namelist,
    columns=ds
)
#print(falsely_let_in_df.T)
fig = px.line(
    falsely_let_in_df.T,
    labels={"value":"Accepted","index":"Difference","variable":"Dataset"}
)
fig.update_layout(height=400, width=800, title_text="Inaccurate examples accepted")
fig.show()

### Crossing points

In [None]:
# minimum difference where `falsely_let_in >= falsely_thrown_out`
crossing_points = np.zeros((len(datasets),3))

for i in range(len(datasets)):
  for (step,d) in enumerate(ds):
    if falsely_let_in[i][step] >= falsely_thrown_out[i][step]:
      crossing_points[i][0] = d
      crossing_points[i][1] = falsely_let_in[i][step]
      crossing_points[i][2] = falsely_thrown_out[i][step]
      break;

# print(crossing_points)

In [None]:
crossing_points = np.round(crossing_points,7)

fig = go.Figure(data=[go.Table(
    header=dict(values=["Dataset","Difference","Accepted","Discarded"]),
    cells=dict(values=[namelist,crossing_points[:,0],crossing_points[:,1],crossing_points[:,2]])
)])
fig.update_layout(height=300, width=600, title_text="Crossing points")
fig.show()

## Checking by committee

### Checking function

In [None]:
def committee_check(
    committee, # models
    predictions, # models * examples
    right_labels, # datasets * 2 * examples (2=train and test sets respectively)
    wrong_labels # datasets * examples (just test set labels)
):
  num_models = len(committee);
  # Number of correctly labelled examples, discarded
  # (When an example is correctly labelled, yet the systems check discards it)
  falsely_thrown_out = np.zeros(num_models)

  # Number of wrongly labelled examples, accepted
  # (When an example is incorrectly labelled, yet the systems check fails to discard it)
  falsely_let_in = np.zeros(num_models)

  votes = [v for v in range(1,num_models+1)] # votes required

  for votes_required in votes:
      falsely_thrown_out_temp = 0
      falsely_let_in_temp = 0

      for j in range(right_labels[1].shape[0]): # Iterating across test examples
        # Counts number of committee members which classified example as correct label
        correct = 0
        # Counts number of committee members which classified example as incorrect label
        wrong = 0
        for k in range(num_models):
          if predictions[k][j] == right_labels[1][j]:
            correct += 1
          # since `wrong_labels[a][b][c] != labels[1][j]` this can be else to avoid uneeded comp
          elif predictions[k][j] == wrong_labels[j]:
            wrong += 1

        if correct < votes_required:
          falsely_thrown_out_temp += 1
        if wrong >= votes_required:
          falsely_let_in_temp += 1

      falsely_thrown_out[votes_required-1] = falsely_thrown_out_temp
      falsely_let_in[votes_required-1] = falsely_let_in_temp

  return (falsely_thrown_out,falsely_let_in)

### Running checking by commitee


In [None]:
gc.collect()

# Sets start time
outer_start = time.time()

max_committee_size =  9 # committee_size % 2 == 1

nets = 4 * np.sum([s for s in range(1,max_committee_size+1,2)])
print("nets:",nets)

total_stats = []

for committee_size in range(1,max_committee_size+1,2):
  start = time.time()
  print("[",end =" ")

  stats = []
  for (conv_model,dataset,l,wl) in zip(conv_models,datasets,labels,wrong_labels):
    committee = [keras.models.clone_model(conv_model) for i in range(committee_size)]
    gc.collect()

    test_prediction = []

    print("{",end =" ")

    for i in range(committee_size):
      committee[i].compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
      committee[i].fit(dataset[0][0], dataset[0][1], batch_size=BATCH_SIZE, epochs=epochs, verbose=0)
      test_prediction.append(np.argmax(committee[i].predict(dataset[1][0]),1))
      print(str(i+1),end=" ")

    stats.append(committee_check(committee,test_prediction,l,wl))

    print("}",end=" ")
  total_stats.append(np.array(stats))
  print("]",str(committee_size)+"/"+str(max_committee_size),"{:.2f}".format((time.time() - start)/60)+"m")
# prints time taken
print()
print("{:.2f}".format((time.time() - outer_start) / 60)+"m")

nets: 100
[ { 1 } { 1 } { 1 } { 1 } ] 1/9 2.11m
[ { 1 2 3 } { 1 2 3 } { 1 2 3 } { 1 2 3 } ] 3/9 6.29m
[ { 1 2 3 4 5 } { 1 2 3 4 5 } { 1 2 3 4 5 } { 1 2 3 4 5 } ] 5/9 10.45m
[ { 1 2 3 4 5 6 7 } { 1 2 3 4 5 6 7 } { 1 2 3 4 5 6 7 } { 1 2 3 4 5 6 7 } ] 7/9 14.67m
[ { 1 2 3 4 5 6 7 8 9 } { 1 2 3 4 5 6 7 8 9 } { 1 2 3 4 5 6 7 8 9 } { 1 2 3 4 5 6 7 8 9 } ] 9/9 18.95m

52.47m


In [None]:
for i in range(len(total_stats)):
  print(np.array(total_stats[i]).shape)

(4, 2, 1)
(4, 2, 3)
(4, 2, 5)
(4, 2, 7)
(4, 2, 9)


### Graphs

In [None]:
gc.collect()

votes = [v+1 for v in range(max_committee_size)]

import plotly.graph_objects as go

# Falsely accepted
fig_out = make_subplots(
    rows=1, 
    cols=len(total_stats),
    horizontal_spacing = 0.01,
    shared_yaxes=True
)
# Falsely discarded
fig_in = make_subplots(
    rows=1, 
    cols=len(total_stats),
    horizontal_spacing = 0.01,
    shared_yaxes=True
)
# Combined
fig_com = make_subplots(
    rows=1, 
    cols=len(total_stats),
    horizontal_spacing = 0.01,
    shared_yaxes=True
)

# Default plotly colors
colors = [
    '#1f77b4',  # muted blue
    '#ff7f0e',  # safety orange
    '#2ca02c',  # cooked asparagus green
    '#d62728',  # brick red
    '#9467bd',  # muted purple
    '#8c564b',  # chestnut brown
    '#e377c2',  # raspberry yogurt pink
    '#7f7f7f',  # middle gray
    '#bcbd22',  # curry yellow-green
    '#17becf'   # blue-teal
]

for (i,stat) in enumerate(total_stats):
  #print(falsely_thrown_out.T)
  for j in range(stat[:,0,:].shape[0]):
    fig_out.add_trace(
        go.Scatter(
            x=votes[0:2*i+1],
            y=stat[:,0,:][j],
            name=namelist[j],
            line_color=colors[j],
            showlegend = True if i==0 else False
        ),
        row=1,col=i+1
    )

    fig_in.add_trace(
        go.Scatter(
            x=votes[0:2*i+1],
            y=stat[:,1,:][j],
            name=namelist[j],
            line_color=colors[j],
            showlegend = True if i==0 else False
        ),
        row=1,col=i+1
    )
    fig_com.add_trace(
        go.Scatter(
            x=votes[0:2*i+1],
            y=np.array(stat[:,0,:][j])+np.array(stat[:,1,:][j]),
            name=namelist[j],
            line_color=colors[j],
            showlegend = True if i==0 else False
        ),
        row=1,col=i+1
    )

fig_out.update_layout(height=400, width=1100, title_text="Accurate examples discarded")
fig_out.update_xaxes(type='category')
fig_out.show()

fig_in.update_layout(height=400, width=1100, title_text="Inaccurate examples accepted")
fig_in.update_xaxes(type='category')
fig_in.show()

fig_com.update_layout(height=400, width=1100, title_text="Combined")
fig_com.update_xaxes(type='category')
fig_com.show()

### Min points

In [None]:
# for each model find min `falsely_let_in + falsely_thrown_out`
# `100000` is in place of `f32::MAX`
mins = np.array([[100000 for j in range(4)] for i in range(len(datasets))])

for i in range(len(datasets)):
  for c in range(int(max_committee_size/2)+1):
    for v in range(0,2*c+1):
      #print(i,c,v)
      if total_stats[c][i][0][v] + total_stats[c][i][1][v] < mins[i][2] + mins[i][3]:
        mins[i][0] = v+1
        mins[i][1] = 2*c+1
        mins[i][2] = total_stats[c][i][0][v]
        mins[i][3] = total_stats[c][i][1][v]
        break;

# print(crossing_points)

In [None]:
total = mins[:,2] + mins[:,3]

lengths = np.array([len(datasets[i][0][1]) for t in range(len(datasets))])
percent = 100 * total / (2 * lengths)

fig = go.Figure(data=[go.Table(
    header=dict(values=["Dataset","Votes","Committee","Accepted","Discarded","Total","Total %"]),
    cells=dict(values=[
                       namelist,
                       mins[:,0],
                       mins[:,1],
                       mins[:,2],
                       mins[:,3],
                       total,percent
  ])
)])
fig.update_layout(height=300, width=800, title_text="Min points")
fig.show()