## Neural Rock Train Model Notebook

The following cell sets up the entire repository from githubg and links to the google drive where the dataset it stored. After all the requirements get installed.

In [1]:
import os

if 'google.colab' in str(get_ipython()):
    print('Running on CoLab')
    import os
    from getpass import getpass
    import urllib

    user = input('User name: ')
    password = getpass('Password: ')
    password = urllib.parse.quote(password) # your password is converted into url format

    cmd_string = 'git clone https://{0}:{1}@github.com/LukasMosser/neural_rock_typing.git'.format(user, password)

    os.system(cmd_string)
    cmd_string, password = "", "" # removing the password from the variable
    os.chdir("./neural_rock_typing")
    os.system('pip install -r requirements.txt')
    os.system('pip install -e .')

    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)
else:
    print('Not running on CoLab')

Not running on CoLab


In [2]:
import os
os.chdir("./neural_rock_typing")

FileNotFoundError: [Errno 2] No such file or directory: './neural_rock_typing'

### A Hack needed to make Pytorch Lightning work with Colab again

In [2]:
!pip install wandb
!pip install git+https://github.com/PyTorchLightning/pytorch-lightning


Collecting git+https://github.com/PyTorchLightning/pytorch-lightning
  Cloning https://github.com/PyTorchLightning/pytorch-lightning to /tmp/pip-req-build-h5ygrrci
  Running command git clone -q https://github.com/PyTorchLightning/pytorch-lightning /tmp/pip-req-build-h5ygrrci
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Building wheels for collected packages: pytorch-lightning
  Building wheel for pytorch-lightning (PEP 517) ... [?25l[?25hdone
  Created wheel for pytorch-lightning: filename=pytorch_lightning-1.4.0.dev0-cp37-none-any.whl size=807126 sha256=e1b207239be660f81f0ac83f066b16656d58c59718bd0bf31c361478b61e9b9e
  Stored in directory: /tmp/pip-ephem-wheel-cache-5lfz_9my/wheels/e2/c6/88/caa5d4cfbfab631fc84b0107896a6f661a1caf589160c27e71
Successfully built pytorch-lightning


In [3]:
import pytorch_lightning as pl

## Login to Weights & Biases for Logging

In [3]:
!wandb login

[34m[1mwandb[0m: Currently logged in as: [33mlukas-mosser[0m (use `wandb login --relogin` to force relogin)


In [4]:
!git pull

Already up to date.


## Basic Imports

In [4]:
import sys
import os
import argparse
from pathlib import Path
import json
import wandb
from torchvision import transforms
from torch.utils.data import DataLoader, ConcatDataset
import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger, TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint

from neural_rock.dataset import GPUThinSectionDataset
from neural_rock.model import NeuralRockModel, make_vgg11_model, make_resnet18_model
from neural_rock.plot import visualize_batch
from neural_rock.utils import MEAN_TRAIN, STD_TRAIN

## Hyperparameters

In [5]:
wandb_name = 'lukas-mosser'
learning_rate = 3e-4
batch_size = 16
weight_decay = 1e-5
dropout = 0.5

train_dataset_mult = 50
val_dataset_mult = 50

seed = 42

## Perform Training Sweep across 12 Models

We train a Resnet and a VGG network each with a frozen feature extractor for each labelset: Lucia, Dunham, and DominantPore Type. 

This leads to a total of 12 models.

In [7]:
os.chdir("..")

In [None]:
for labelset in ['Lucia', 'Dunham', 'DominantPore']:
  for model in ['vgg', 'resnet']:
    for frozen in [True, False]:

      # Set the base path for the models to be stored in the Google Drive
      path = Path("./data/models/{0:}/{1:}/{2:}".format(labelset, model, str(frozen)))
      path.mkdir(parents=True, exist_ok=True)

      # Set the Random Seed on Everything
      pl.seed_everything(seed)

      # Data Augmentation used for Training
      data_transforms = {
          'train': transforms.Compose([
              transforms.RandomHorizontalFlip(),
              transforms.RandomRotation(degrees=360),
              transforms.RandomCrop((512, 512)),
              transforms.ColorJitter(hue=0.5),
              transforms.Resize((224, 224)),
              transforms.Normalize(mean=MEAN_TRAIN, std=STD_TRAIN)
          ]),
          'val':
              transforms.Compose([
                  transforms.RandomCrop((512, 512)),
                  transforms.Resize((224, 224)),
                  transforms.Normalize(mean=MEAN_TRAIN, std=STD_TRAIN)
              ])
      }

      # Load the Datasets
      train_dataset_base = GPUThinSectionDataset(Path("."), labelset, preload_images=True,
                                          transform=data_transforms['train'], train=True, seed=seed)

      val_dataset_base = GPUThinSectionDataset(Path("."), labelset, preload_images=True,
                                          transform=data_transforms['train'], train=False, seed=seed)
      train_test_split = {'train': train_dataset_base.image_ids, 'test': val_dataset_base.image_ids}
      
      with open(path.joinpath('train_test_split.json'), 'w') as fp:
          json.dump(train_test_split, fp)
 
      # We multiply the validation dataset to randomly increase the number of images we evaluate against.
      val_dataset = ConcatDataset([val_dataset_base]*100)

      # Setup dataloaders
      train_loader = DataLoader(train_dataset_base, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=False)
      val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=False)
      print(len(train_loader), len(val_loader))
      break
      """
      # Setup Weights and Biases Logger
      wandb_logger = WandbLogger(name=wandb_name, project='neural-rock-finak-3', entity='ccg')
      wandb_logger.experiment.config.update({"labelset": labelset, "model": model, 'frozen': str(frozen)})
      tensorboard_logger = TensorBoardLogger("lightning_logs", name=labelset)
      
      # Checkpoint based on validation F1 score
      checkpointer = ModelCheckpoint(dirpath=path, filename='best', monitor="val/f1", verbose=True, mode="max")
      
      # Setup the Pytorch Lightning Dataloader
      trainer = pl.Trainer(gpus=-1, 
                           max_steps=15000, 
                           benchmark=True,
                          logger=[wandb_logger, tensorboard_logger],
                          callbacks=[checkpointer],
                          progress_bar_refresh_rate=20,
                          check_val_every_n_epoch=100)
      
      # Select which model to run
      if model == 'vgg':
        feature_extractor, classifier = make_vgg11_model(train_dataset_base.num_classes, dropout=dropout)
      elif model == 'resnet':
        feature_extractor, classifier = make_resnet18_model(train_dataset_base.num_classes)

      # Create the model itself, ready for training
      model_ = NeuralRockModel(feature_extractor, classifier, num_classes=train_dataset_base.num_classes, freeze_feature_extractor=frozen)

      # Train the model
      trainer.fit(model_, train_dataloader=train_loader, val_dataloaders=val_loader)

      # Clean up some images on GPU to avoid Out of Memory errors
      del train_dataset_base.images
      del val_dataset_base.images

      # Clean up Weights and Biases Logging
      wandb.finish()"""

Global seed set to 42
