<a href="https://colab.research.google.com/github/Aggraj/Deep-Learning-CS-6910/blob/main/Finetuning_pretrained_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# set SIZE to "TINY", "MEDIUM", or "LARGE"
# to select one of these three datasets
# TINY dataset: 100 images, 30MB
# MEDIUM dataset: 1000 images, 312MB
# LARGE datast: 12,000 images, 3.6GB

SIZE = "LARGE"

In [3]:
if SIZE == "TINY":
  src_url = "https://storage.googleapis.com/wandb_datasets/nature_100.zip"
  src_zip = "nature_100.zip"
  DATA_SRC = "nature_100"
  IMAGES_PER_LABEL = 10
  BALANCED_SPLITS = {"train" : 8, "val" : 1, "test": 1}
elif SIZE == "MEDIUM":
  src_url = "https://storage.googleapis.com/wandb_datasets/nature_1K.zip"
  src_zip = "nature_1K.zip"
  DATA_SRC = "nature_1K"
  IMAGES_PER_LABEL = 100
  BALANCED_SPLITS = {"train" : 80, "val" : 10, "test": 10}
elif SIZE == "LARGE":
  src_url = "https://storage.googleapis.com/wandb_datasets/nature_12K.zip"
  src_zip = "nature_12K.zip"
  DATA_SRC = "inaturalist_12K/train" # (technically a subset of only 10K images)
  IMAGES_PER_LABEL = 1000
  BALANCED_SPLITS = {"train" : 800, "val" : 100, "test": 100}

In [4]:
%%capture
!curl -SL $src_url > $src_zip
!unzip $src_zip

# Step 0: Setup

Start out by installing the experiment tracking library and setting up your free W&B account:


*   **pip install wandb** – Install the W&B library
*   **import wandb** – Import the wandb library
*   **wandb login** – Login to your W&B account so you can log all your metrics in one place

In [5]:
!pip install wandb -qq
import wandb
wandb.login()

[K     |████████████████████████████████| 2.1MB 8.3MB/s 
[K     |████████████████████████████████| 102kB 10.7MB/s 
[K     |████████████████████████████████| 163kB 34.6MB/s 
[K     |████████████████████████████████| 133kB 33.0MB/s 
[K     |████████████████████████████████| 71kB 8.5MB/s 
[?25h  Building wheel for subprocess32 (setup.py) ... [?25l[?25hdone
  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize


wandb: Paste an API key from your profile and hit enter: ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [6]:
import os
from random import shuffle

# source directory for all raw data
SRC = DATA_SRC
# number of images per class label
# the total number of images is 10X this (10 classes)
TOTAL_IMAGES = IMAGES_PER_LABEL * 10
PROJECT_NAME = "artifacts_demo"
PREFIX = "inat" # convenient for tracking local data

# Step 1: Upload raw data

In [7]:
RAW_DATA_AT = "_".join([PREFIX, "raw_data", str(TOTAL_IMAGES)])
run = wandb.init(project=PROJECT_NAME, job_type="upload")

# create an artifact for all the raw data
raw_data_at = wandb.Artifact(RAW_DATA_AT, type="raw_data")

# SRC_DIR contains 10 folders, one for each of 10 class labels
# each folder contains images of the corresponding class
labels = os.listdir(SRC)
for l in labels:
  imgs_per_label = os.path.join(SRC, l)
  if os.path.isdir(imgs_per_label):
    imgs = os.listdir(imgs_per_label)
    # randomize the order
    shuffle(imgs)
    img_file_ids = imgs[:IMAGES_PER_LABEL]
    for f in img_file_ids:
      file_path = os.path.join(SRC, l, f)
      # add file to artifact by full path
      raw_data_at.add_file(file_path, name=l + "/" + f)

# save artifact to W&B
run.log_artifact(raw_data_at)
run.finish()

[34m[1mwandb[0m: Currently logged in as: [33mchaxin[0m (use `wandb login --relogin` to force relogin)


VBox(children=(Label(value=' 30.30MB of 30.30MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.…

# Step 2: Prepare a data split


In [8]:
run = wandb.init(project=PROJECT_NAME, job_type="data_split")

# find the most recent ("latest") version of the full raw data
# you can of course pass around programmatic aliases and not string literals
data_at = run.use_artifact(RAW_DATA_AT + ":latest")
# download it locally (for illustration purposes/across hardware; you can
# also sync/version artifacts by reference)
data_dir = data_at.download()

# create balanced train, val, test splits
# each count is the number of images per label
DATA_SPLITS = BALANCED_SPLITS

ats = {}
# wrap artifacts in dictionary for convenience
for split, count in DATA_SPLITS.items():
  ats[split] = wandb.Artifact("_".join([PREFIX, split, "data", str(count*10)]), 
                              "_".join([split, "data"]))

labels = os.listdir(data_dir)
for l in labels:
  if l.startswith("."): # skip non-label file
    continue
  imgs_per_label = os.listdir(os.path.join(data_dir, l))
  shuffle(imgs_per_label)
  start_id = 0
  for split, count in DATA_SPLITS.items():
    # take a subset
    split_imgs = imgs_per_label[start_id:start_id+count]
    for img_file in split_imgs:
      full_path = os.path.join(data_dir, l, img_file)
      # add file to artifact by full path
      # note: pass the label to the name parameter to retain it in
      # the data structure 
      ats[split].add_file(full_path, name = os.path.join(l, img_file))
    start_id += count

# save all three artifacts to W&B
# note: yes, in this example, we are cheating and have labels for the "test" data ;)
for split, artifact in ats.items():
  run.log_artifact(artifact)

run.finish()

VBox(children=(Label(value=' 30.30MB of 30.30MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.…

# Step 3: Train with artifacts and save model




In [25]:
# EXPERIMENT CONFIG
#---------------------------
# if you modify these, make sure the total count is less than or equal to
# the number of files uploaded for that split in the train/val data artifact
NUM_TRAIN = BALANCED_SPLITS["train"] * 10
NUM_VAL = BALANCED_SPLITS["val"] * 10
NUM_EPOCHS = 1 # set low for demo purposes; try 3, 5, or as many as you like

# model name
# if you want to train a sufficiently different model, give this a new name
# to start a new lineage for the model, instead of just incrementing the
# version of the old model
MODEL_NAME = "iv3_trained"

# folder in which to save initial, untrained model
INIT_MODEL_DIR = "init_model_keras_iv3"

# folder in which to save the final, trained model
# if you want to train a sufficiently different model, give this a new name
# to start a new lineage for the model, instead of just incrementing the
# version of the old model
FINAL_MODEL_DIR = "trained_keras_model_iv3"

import numpy as np

from sklearn.metrics import precision_recall_curve, roc_curve
from sklearn.metrics import average_precision_score
from sklearn.preprocessing import label_binarize

from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2
from tensorflow.keras.applications.xception import Xception


from tensorflow.keras.callbacks import Callback
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from wandb.keras import WandbCallback

# experiment configuration saved to W&B
config_defaults = {
  "num_train" : NUM_TRAIN,
  "num_val" : NUM_VAL,
  "epochs" : NUM_EPOCHS,
  "num_classes" : 10,
  "fc_size" : 1024,
  # inceptionV3 settings
  "img_width" : 299,
  "img_height": 299,
  "batch_size" : 32,
  "model_name" : 'InceptionResNetV2'
}

def finetune_model(fc_size, num_classes,model_name):
  """Load InceptionV3 with ImageNet weights, freeze it,
  and attach a finetuning top for this classification task"""
  # load InceptionV3 as base
  if model_name == 'InceptionV3' :
    base = InceptionV3(weights="imagenet", include_top="False")
    # freeze base layers
    for layer in base.layers:
      layer.trainable = False
    x = base.get_layer('mixed10').output 
  if model_name == 'InceptionResNetV2' :
    base = InceptionResNetV2(weights="imagenet", include_top="False")
    # freeze base layers
    for layer in base.layers:
      layer.trainable = False
    x = base.get_layer('conv_7b_ac').output
  if model_name == 'ResNet50' :
    base = ResNet50(weights="imagenet", include_top="False")
    # freeze base layers
    for layer in base.layers:
      layer.trainable = False
    x = base.get_layer('conv5_block3_out').output
  if model_name == 'Xception' :
    base = Xception(weights="imagenet", include_top="False")
    # freeze base layers
    for layer in base.layers:
      layer.trainable = False
    x = base.get_layer('block14_sepconv2_act').output  

  # attach a fine-tuning layer
  x = GlobalAveragePooling2D()(x)
  x = Dense(fc_size, activation='relu')(x)
  guesses = Dense(num_classes, activation='softmax')(x)

  model = Model(inputs=base.input, outputs=guesses)
  model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
  return model

def train():
  """ Main training loop. This is called pretrain because it freezes
  the InceptionV3 layers of the model and only trains the new top layers
  on the new data.   subsequent training phase would unfreeze all the layers
  and finetune the whole model on the new data""" 
  # track this experiment with wandb: all runs will be sent
  # to the given project name
  run = wandb.init(project=PROJECT_NAME, job_type="train", config=config_defaults)
  cfg = wandb.config

  # artifact names
  train_at = os.path.join(PROJECT_NAME, PREFIX + "_train_data_" + str(NUM_TRAIN)) + ":latest"
  val_at = os.path.join(PROJECT_NAME, PREFIX + "_val_data_" + str(NUM_VAL)) + ":latest"

  train_data = run.use_artifact(train_at, type='train_data')
  train_dir = train_data.download()
  val_data = run.use_artifact(val_at, type='val_data')
  val_dir = val_data.download()

  # create train and validation data generators
  train_datagen = ImageDataGenerator(
      rescale=1. / 255,
      shear_range=0.2,
      zoom_range=0.2,
      horizontal_flip=True)
  val_datagen = ImageDataGenerator(rescale=1. / 255)

  train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(cfg.img_width, cfg.img_height),
    batch_size=cfg.batch_size,
    class_mode='categorical')

  val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(cfg.img_width, cfg.img_height),
    batch_size=cfg.batch_size,
    class_mode='categorical')

  # instantiate model and callbacks
  model_name = cfg.model_name
  model = finetune_model(cfg.fc_size, cfg.num_classes,model_name)

  # log initial model before training
  model_artifact = wandb.Artifact(
            "iv3", type="model",
            description="unmodified inception v3",
            metadata=dict(cfg))

  model.save(INIT_MODEL_DIR)
  model_artifact.add_dir(INIT_MODEL_DIR)
  run.log_artifact(model_artifact)
  callbacks = [WandbCallback()]

  # train!
  model.fit(
    train_generator,
    steps_per_epoch = cfg.num_train // cfg.batch_size,
    epochs=cfg.epochs,
    validation_data=val_generator,
    callbacks = callbacks,
    validation_steps=cfg.num_val // cfg.batch_size)

  # save trained model as artifact
  trained_model_artifact = wandb.Artifact(
            MODEL_NAME, type="model",
            description="trained inception v3",
            metadata=dict(cfg))

  model.save(FINAL_MODEL_DIR)
  trained_model_artifact.add_dir(FINAL_MODEL_DIR)
  run.log_artifact(trained_model_artifact)
  run.finish()

In [24]:
train()



Found 80 images belonging to 10 classes.
Found 10 images belonging to 10 classes.


KeyboardInterrupt: ignored

In [26]:
sweep_config = {
    'method': 'random', #grid, random
    'metric': {
      'name': 'accuracy',
      'goal': 'maximize'   
    },
    'parameters': {
        'model_name': {
            'values':['InceptionV3','InceptionResNetV2','Xception','ResNet50']
        },
        'epochs':{
            'values' :[2,5]
        }
    }
}

In [27]:
sweep_id = wandb.sweep(sweep_config, entity="chaxin", project="Assignment 1")



Create sweep with ID: 8pq8lxcd
Sweep URL: https://wandb.ai/chaxin/Assignment%201/sweeps/8pq8lxcd


In [None]:
wandb.agent(sweep_id, train)

[34m[1mwandb[0m: Agent Starting Run: rhl4r6ez with config:
[34m[1mwandb[0m: 	epochs: 2
[34m[1mwandb[0m: 	model_name: InceptionV3


Found 80 images belonging to 10 classes.
Found 10 images belonging to 10 classes.
INFO:tensorflow:Assets written to: init_model_keras_iv3/assets


[34m[1mwandb[0m: Adding directory to artifact (./init_model_keras_iv3)... Done. 0.9s


Epoch 1/2
Epoch 2/2
INFO:tensorflow:Assets written to: trained_keras_model_iv3/assets


[34m[1mwandb[0m: Adding directory to artifact (./trained_keras_model_iv3)... Done. 1.0s


VBox(children=(Label(value=' 202.27MB of 202.27MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=…

0,1
epoch,1.0
loss,9.18381
accuracy,0.15625
_runtime,92.0
_timestamp,1619085669.0
_step,1.0


0,1
epoch,▁█
loss,█▁
accuracy,▁█
_runtime,▁█
_timestamp,▁█
_step,▁█


[34m[1mwandb[0m: Agent Starting Run: tj2xr43z with config:
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	model_name: InceptionV3


Found 80 images belonging to 10 classes.
Found 10 images belonging to 10 classes.
INFO:tensorflow:Assets written to: init_model_keras_iv3/assets


[34m[1mwandb[0m: Adding directory to artifact (./init_model_keras_iv3)... Done. 1.1s


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
INFO:tensorflow:Assets written to: trained_keras_model_iv3/assets


[34m[1mwandb[0m: Adding directory to artifact (./trained_keras_model_iv3)... Done. 1.3s


VBox(children=(Label(value=' 202.27MB of 202.27MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=…

0,1
epoch,4.0
loss,1.97254
accuracy,0.35938
_runtime,88.0
_timestamp,1619085822.0
_step,4.0


0,1
epoch,▁▃▅▆█
loss,██▇▂▁
accuracy,▁▃▅█▅
_runtime,▁▃▅▆█
_timestamp,▁▃▅▆█
_step,▁▃▅▆█


[34m[1mwandb[0m: Agent Starting Run: 1gwdyp5p with config:
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	model_name: InceptionResNetV2


Found 80 images belonging to 10 classes.
Found 10 images belonging to 10 classes.


In [None]:
base = ResNet50(weights="imagenet", include_top="False")

for layer in base.layers: print(layer.name)

# Step 4: Load model for inference


In [None]:
from tensorflow import keras
from tensorflow.keras.preprocessing import image
import numpy as np
import os
run = wandb.init(project=PROJECT_NAME, job_type="inference")
# use the latest version of the model
model_at = run.use_artifact(MODEL_NAME + ":latest")
# download the directory in which the model is saved
model_dir= model_at.download()
print("model: ", model_dir)
model = keras.models.load_model(model_dir)

TEST_DATA_AT = PREFIX + "_test_data_" + str(BALANCED_SPLITS["test"]*10) + ":latest"
test_data_at = run.use_artifact(TEST_DATA_AT)
test_dir = test_data_at.download()

imgs = []
class_labels = os.listdir(test_dir)
for l in class_labels:
  if l.startswith("."):
    continue
  imgs_per_class = os.listdir(os.path.join(test_dir, l))
  for img in imgs_per_class:
    img_path = os.path.join(test_dir, l, img)
    img = image.load_img(img_path, target_size=(299, 299))
    img = image.img_to_array(img)
    # don't forget to rescale test images to match the range of inputs
    # to the network
    img = np.expand_dims(img/255.0, axis=0)
    imgs.append(img)

preds = {}
imgs = np.vstack(imgs)
classes = model.predict(imgs, batch_size=32)
for c in classes:
  class_id = np.argmax(c)
  if class_id in preds:
    preds[class_id] += 1
  else:
    preds[class_id] = 1

# print the counts of predicted labels as a quick sanity check
# note that for tiny/medium datasets, this won't be very meaningful
print(preds)
run.finish()


In [None]:
# use this to freeze 200 layers or k layers instead of freezing all layers except last one

if model_name == 'InceptionV3' :
    base = InceptionV3(weights="imagenet", include_top="False")
    # freeze base layers
    for layer in base.layers[200:]:
      layer.trainable = False
    x = base.get_layer('mixed10').output 
  if model_name == 'InceptionResNetV2' :
    base = InceptionResNetV2(weights="imagenet", include_top="False")
    # freeze base layers
    for layer in base.layers[200:]:
      layer.trainable = False
    x = base.get_layer('conv_7b_ac').output
  if model_name == 'ResNet50' :
    base = ResNet50(weights="imagenet", include_top="False")
    # freeze base layers
    for layer in base.layers[200:]:
      layer.trainable = False
    x = base.get_layer('conv5_block3_out').output
  if model_name == 'Xception' :
    base = Xception(weights="imagenet", include_top="False")
    # freeze base layers
    for layer in base.layers[200:]:
      layer.trainable = False
    x = base.get_layer('block14_sepconv2_act').output  
