## Simple CNN network

The original problem is:

    (P) Compute the fabrics from multiple sliced images per rev

We are firstly having an intermediate objective:

    (P') Compute the fabrics from one single sliced images per rev

Of course, the accuracy will be much worse because the fabrics are computed in every direction. However, it is a good try.

# Importing the dataframe

Firstly, we initialize wandb. It is a tool that allows to store the losses and retrieve the deframe. Otherwise, you can directly access locally the dataframe on your computer.

In [46]:
!pip install wandb --upgrade

We import all the useful packages.

In [47]:
import sys
from pathlib import Path

IS_COLAB = "google.colab" in sys.modules
IS_KAGGLE = "kaggle_secrets" in sys.modules
if IS_KAGGLE:
    repo_path = Path("../input/microstructure-reconstruction")
elif IS_COLAB:
    from google.colab import drive

    drive.mount("/content/gdrive")
    repo_path = Path("/content/gdrive/MyDrive/microstructure-reconstruction")
else:
    repo_path = Path("/home/matias/microstructure-reconstruction")
sys.path.append(str(repo_path))

from copy import deepcopy
from importlib import reload

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.optim as optim
import torchmetrics
import torchvision.models as models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader
from torchvision import transforms, utils
from tqdm import tqdm

import wandb
from custom_datasets import dataset
from tools import dataframe_reformat, inspect_code, plotting, training, wandb_api

log_wandb = True

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {"num_workers": 2, "pin_memory": True} if use_cuda else {"num_workers": 4}
print(f"[INFO]: Computation device: {device}")


We initialize a wandb run, that will save our metrics

In [48]:
if log_wandb:
    import wandb

    wandb_api.login()
    run = wandb.init(
        project="microstructure-reconstruction",
        group="Naive Network",
        job_type="train",
    )


Parameters of our run:

In [49]:
if log_wandb:
    config = wandb.config
else:
    config = {}

config["job_type"] = run.job_type
config["train_val_split"] = 0.7
config["seed"] = 42
config["batch_size"] = 64
config["learning_rate"] = 0.0001
config["device"] = device
config["momentum"] = 0.9
config["architecture"] = "VGG"
config["input_width"] = 64
config["weight_decay"] = 0.0000
config["epochs"] = 0
config["frac_sample"] = 1
config["log_wandb"] = log_wandb
torch.manual_seed(config["seed"])
pl.seed_everything(config["seed"])


# Datasets

We create the training and validation dataset from our dataframe of descriptors. 

In [50]:
class DataModule(pl.LightningDataModule):
    def __init__(
        self,
        config,
        repo_path,
    ):
        super().__init__()
        self.config = config

        if self.config["log_wandb"]:
            self.data_at = wandb.Api().artifact(
                "matiasetcheverry/microstructure-reconstruction/"
                + "raw_fabrics"
                + ":3_images"
            )

        self.transform = transforms.Compose(
            [
                transforms.CenterCrop(207),
                transforms.Resize(
                    (self.config["input_width"], self.config["input_width"])
                ),
                transforms.ToTensor(),
                transforms.GaussianBlur(kernel_size=3, sigma=0.5),
            ]
        )

    def prepare_data(self):
        if self.config["log_wandb"]:
            self.data_at.download()

    def init_fabrics_df(self):
        if self.config["log_wandb"]:
            self.fabrics_df = wandb_api.convert_table_to_dataframe(
                self.data_at.get("fabrics")
            )
        else:
            self.fabrics_df = pd.read_csv(repo_path / "REV1_600/fabrics.txt")
            path_to_slices = repo_path / "REV1_600/REV1_600Slices"
            self.fabrics_df["photos"] = self.fabrics_df["id"].apply(
                func=dataframe_reformat.associate_rev_id_to_its_images,
                args=(path_to_slices, 3, repo_path),
            )
        self.fabrics_df["photos"] = self.fabrics_df["photos"].apply(
            func=lambda photo_paths: [str(repo_path / Path(x)) for x in photo_paths]
        )
        self.fabrics_df = self.fabrics_df[self.fabrics_df.photos.str.len().gt(0)]
        self.fabrics_df = self.fabrics_df.sample(
            frac=self.config["frac_sample"], random_state=self.config["seed"]
        )
        self.single_fabrics_df = dataframe_reformat.convert_into_single_entry_df(
            self.fabrics_df, "photos"
        )

    def setup(self, stage):
        self.init_fabrics_df()
        self.scaler = MinMaxScaler(feature_range=(0, 1))
        normalized_fabrics = deepcopy(self.single_fabrics_df)
        normalized_fabrics.iloc[:, 1:-1] = self.scaler.fit_transform(
            self.single_fabrics_df.iloc[:, 1:-1]
        )

        train_df, test_df = train_test_split(
            normalized_fabrics,
            train_size=self.config["train_val_split"],
            random_state=self.config["seed"],
            shuffle=True,
        )

        self.train_dataset = dataset.SinglePhotoDataset(
            train_df, transform=self.transform
        )
        self.validation_dataset = dataset.SinglePhotoDataset(
            test_df,
            transform=self.transform,
        )
        self.targets = test_df.iloc[:, 1:-1].to_numpy()

    def train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            batch_size=self.config["batch_size"],
            shuffle=True,
            **kwargs,
        )

    def val_dataloader(self):
        return DataLoader(
            self.validation_dataset,
            batch_size=self.config["batch_size"],
            shuffle=False,
            **kwargs,
        )

    def test_dataloader(self):
        return self.val_dataloader()

    def predict_dataloader(self):
        return DataLoader(
            [image for image, _ in self.validation_dataset],
            batch_size=self.config["batch_size"],
            shuffle=False,
            **kwargs,
        )


dm = DataModule(config, repo_path)


# Model

We then create our model, with a forward method.

In [51]:
class VGG11(pl.LightningModule):
    def __init__(self, config, scaler=None):
        super().__init__()

        self.config = config
        self.config["model_type"] = type(self)
        self.scaler = scaler

        self.configure_model()
        self.configure_criterion()
        self.configure_metrics()

    def configure_model(self):
        # convolutional layers
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        input_fc = int((self.config["input_width"] / (2 ** 5)) ** 2 * 512)
        # fully connected linear layers
        self.linear_layers = nn.Sequential(
            nn.Linear(in_features=input_fc, out_features=512),
            nn.ReLU(),
            nn.Dropout2d(0.5),
            nn.Linear(in_features=512, out_features=512),
            nn.ReLU(),
            nn.Dropout2d(0.5),
            nn.Linear(in_features=512, out_features=23),
        )

    def forward(self, x):
        x = self.conv_layers(x)
        # flatten to prepare for the fully connected layers
        x = x.view(x.size(0), -1)
        x = self.linear_layers(x)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)
        self.log(
            "train_loss",
            loss,
            on_step=False,
            on_epoch=True,
        )
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        metrics = {name: metric(y, y_hat) for name, metric in self.metrics.items()}
        self.log_dict(metrics, on_step=False, on_epoch=True)
        return metrics

    def predict_step(self, batch, batch_idx: int, dataloader_idx: int = None):
        return self(batch)

    def training_epoch_end(self, outputs):
        self.config["epochs"] += 1

    def configure_criterion(self):
        self.criterion = nn.L1Loss()
        self.config["loss_type"] = type(self.criterion)

    def configure_metrics(self):
        self.metrics = {
            "val_loss": self.criterion,
            "mae": torchmetrics.MeanAbsoluteError().to(self.config["device"]),
            "mape": torchmetrics.MeanAbsolutePercentageError().to(
                self.config["device"]
            ),
            "smape": torchmetrics.SymmetricMeanAbsolutePercentageError().to(
                self.config["device"]
            ),
            "r2_score": torchmetrics.R2Score(num_outputs=23).to(self.config["device"]),
            "cosine_similarity": torchmetrics.CosineSimilarity(reduction="mean").to(
                self.config["device"]
            ),
        }

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(
            self.parameters(),
            lr=self.config["learning_rate"],
            weight_decay=self.config["weight_decay"],
        )
        self.config["optimizer_type"] = type(optimizer)
        return optimizer


model = VGG11(config)
total_params = sum(p.numel() for p in model.parameters())
print(f"[INFO]: {total_params:,} total parameters.")


# Checkpoint

We had 2 checkpoints to our training:

* one for saving our model every time we have a minimum in the validation loss 
* one for saving the model's and data module script

In [52]:
model_checkpoint = pl.callbacks.model_checkpoint.ModelCheckpoint(
    dirpath=run.dir,
    filename="{epoch}-{val_loss:.3f}",
    monitor="val_loss",
    mode="min",
    verbose=True,
    save_last=True,
)

script_checkpoint = training.ScriptCheckpoint(
    dirpath=run.dir,
)

callbacks = [script_checkpoint]
log = None
if run.job_type == "train":
    callbacks.append(model_checkpoint)
    print(f"[INFO]: saving models.")
if run.job_type == "debug":
    log = "all"


# Training

We then train our model.

In [None]:
if config["log_wandb"]:
    wandb_logger = pl.loggers.WandbLogger()
    wandb_logger.watch(model, log=log, log_graph=True)
else:
    wandb_logger = None
trainer = pl.Trainer(
    max_epochs=150,
    callbacks=callbacks,
    logger=wandb_logger,
    devices="auto",
    accelerator="auto",
    #     limit_train_batches=0.1,
    #     limit_val_batches=0.1,
    #     log_every_n_steps=1
)
trainer.fit(
    model,
    datamodule=dm,
)
