<a href="https://colab.research.google.com/github/GlassesNoGlasses/CalCount/blob/neil%2Fgoogle_colab/CalCount_Focused.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Focused vs. Other CalCount Models

Due to hardware limitations of Google Colab, `Focused` will be aiming at seeing if a smaller, more precise dataset of 10-20 image types will improve model accuracy.

In [None]:
# import github repo

!git clone https://github.com/GlassesNoGlasses/CalCount.git

In [None]:
# install kaggle

!pip install -q kaggle

In [None]:
# import kaggle datasets

from google.colab import files
files.upload()

In [None]:
# add kaggle directory to google colab

! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/

In [None]:
# change file permissions

! chmod 600 ~/.kaggle/kaggle.json

In [None]:
# download data zip file from kaggle

!kaggle datasets download -d kmader/food41

In [None]:
#unzip kaggle data

!unzip food41.zip

In [None]:
# imports

import matplotlib.pyplot as plt
import numpy as np
import PIL
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

In [None]:
# constants

NUM_FOODS = 20
RANDOM_SEED = 101
BATCH_SIZE = 32
IMG_HEIGHT = 180
IMG_WIDTH = 180
DATA_DIR = "/content/images"

In [None]:
# split images into training and testing/validation

train_ds = tf.keras.utils.image_dataset_from_directory(
  DATA_DIR,
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(IMG_HEIGHT, IMG_WIDTH),
  batch_size=BATCH_SIZE)

val_ds = tf.keras.utils.image_dataset_from_directory(
  DATA_DIR,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(IMG_HEIGHT, IMG_WIDTH),
  batch_size=BATCH_SIZE)

In [None]:
# get class labels

class_labels = train_ds.class_names
class_labels

In [None]:
# get NUM_FOODS random labels

focused_labels = np.random.choice(class_labels, NUM_FOODS, replace=False)
focused_labels

In [None]:
# filter train and validation datasets based on focused labels

train_ds = train_ds.filter(lambda x, y: y in focused_labels)
val_ds = val_ds.filter(lambda x, y: y in focused_labels)

In [None]:
# visualization of data being used

import matplotlib.pyplot as plt

plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(images[i].numpy().astype("uint8"))
    plt.title(focused_labels[labels[i]])
    plt.axis("off")

In [None]:
# Cache the dataset and prefetch images

# Dataset.cache(): Caches dataset to save time and reduce bottleneck fetching
# Dataset.prefetch(): Prefetches next image during training/processing

# How many images to fetch for each batch
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [None]:
# Standardize RGB data

# AKA: Convert RGB [0, 255] to [0, 1] for easier processing
normalization_layer = layers.Rescaling(1./255)

In [None]:
# data augmentation

data_augmentation = keras.Sequential(
  [
    layers.RandomFlip("horizontal",
                      input_shape=(IMG_HEIGHT,
                                  IMG_WIDTH,
                                  3)),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
  ]
)

In [None]:
# Dropout 20% of nodes from applied layer

DROP_RATE = 0.2

# Create model
model = Sequential([
  data_augmentation,
  layers.Rescaling(1./255),
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Dropout(DROP_RATE),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dense(num_classes, name="outputs")
])

In [None]:
# Compile model again

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
# Train again

epochs = 30

history = model.fit(
  train_ds,
  validation_data=val_ds,
  epochs=epochs
)

In [None]:
# Visualize Results

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()