In [1]:
import sys
from pathlib import Path

IS_COLAB = "google.colab" in sys.modules
IS_KAGGLE = "kaggle_secrets" in sys.modules
if IS_KAGGLE:
    repo_path = Path("../input/microstructure-reconstruction")
elif IS_COLAB:
    from google.colab import drive

    drive.mount("/content/gdrive")
    repo_path = Path("/content/gdrive/MyDrive/microstructure-reconstruction")
else:
    repo_path = Path("/home/matias/microstructure-reconstruction")
sys.path.append(str(repo_path))

from copy import deepcopy
from importlib import reload

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.optim as optim
import torchmetrics
import wandb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader
from torchvision import models as pretrained_models
from torchvision import transforms, utils
from tqdm import tqdm

from custom_datasets import dataset
from custom_models import models
from tools import dataframe_reformat, inspect_code, plotting, training, wandb_api

log_wandb = True

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {"num_workers": 2, "pin_memory": True} if use_cuda else {"num_workers": 4}
print(f"[INFO]: Computation device: {device}")


[INFO]: Computation device: cpu


## Export Fabrics Dataframe

In [2]:
run = wandb.init(
    project="microstructure-reconstruction",
    group="upload_artifact",
    job_type="upload_dataframe",
)

if log_wandb:
    config = wandb.config
else:
    config = {}

config["job_type"] = run.job_type if "run" in locals() else "test"
config["train_val_split"] = 0.7
config["seed"] = 42
config["log_wandb"] = log_wandb
torch.manual_seed(config["seed"])
pl.seed_everything(config["seed"])


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmatiasetcheverry[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.11 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Global seed set to 42


42

In [4]:
topLevelFolder = repo_path / "REV1_600"
path_to_revs = topLevelFolder / "REV1_6003D_model"
path_to_fabrics = topLevelFolder / "fabrics.txt"
path_to_slices = topLevelFolder / "REV1_600Slices"


In [9]:
nb_images = 10

fabrics_df = pd.read_csv(path_to_fabrics)
path_to_images = [x for x in path_to_slices.glob(f"{nb_images}p*/")]
fabrics_df["photos"] = fabrics_df["id"].apply(
    func=dataframe_reformat.associate_rev_id_to_its_images,
    args=(path_to_slices, nb_images, repo_path),
)
fabrics_df = fabrics_df[fabrics_df.photos.str.len().gt(0)]
fabrics_df["photos"] = fabrics_df["photos"].apply(func=lambda x: sorted(x))
train_df, test_df = train_test_split(
    fabrics_df,
    train_size=config["train_val_split"],
    random_state=config["seed"],
    shuffle=True,
)
train_df.reset_index(drop=True, inplace=True)
test_df.reset_index(drop=True, inplace=True)
train_df.iloc[:, -1] = train_df.iloc[:, -1].apply(func=sorted, key=lambda path: float(path[path.find("[")+1:path.find("]")]))
test_df.iloc[:, -1] = test_df.iloc[:, -1].apply(func=sorted, key=lambda path: float(path[path.find("[")+1:path.find("]")]))

In [10]:
train_df

Unnamed: 0,id,orientation-0_mean,orientation-1_mean,orientation-2_mean,orientation-3_mean,orientation-4_mean,orientation-5_mean,orientation-0_std,orientation-1_std,orientation-2_std,...,aspectratio-0_std,aspectratio-1_std,size_mean,size_std,solidity_mean,solidity_std,roundness_mean,roundness_std,volume_fraction,photos
0,Spec-197,0.356348,0.323515,0.320137,-0.017216,0.003262,-0.001308,0.314627,0.306735,0.304501,...,0.143148,0.102656,10.741075,2.938481,0.873860,0.055687,0.212789,0.087993,0.217926,[REV1_600/REV1_600Slices/10pics/Spec-197_Imgs/...
1,Spec-344,0.347647,0.331023,0.321330,0.003530,0.002675,0.000964,0.322281,0.311201,0.305359,...,0.138008,0.124940,10.935470,2.712020,0.954486,0.083377,0.358212,0.143051,0.338746,[REV1_600/REV1_600Slices/10pics/Spec-344_Imgs/...
2,Spec-99,0.359718,0.331703,0.308579,-0.012734,0.000141,0.003148,0.313214,0.306073,0.296053,...,0.139168,0.100356,10.947315,2.989302,0.870407,0.058849,0.212515,0.086692,0.223764,[REV1_600/REV1_600Slices/10pics/Spec-99_Imgs/x...
3,Spec-530,0.374891,0.358770,0.266340,0.239456,0.009192,0.008185,0.282298,0.283344,0.167377,...,0.133047,0.120841,11.482441,2.195267,0.928704,0.077101,0.292550,0.127101,0.141834,[REV1_600/REV1_600Slices/10pics/Spec-530_Imgs/...
4,Spec-492,0.143142,0.139783,0.717074,0.247139,0.011478,0.001049,0.144587,0.143042,0.168408,...,0.136951,0.113757,10.953518,2.482555,0.919975,0.090867,0.277326,0.125126,0.296957,[REV1_600/REV1_600Slices/10pics/Spec-492_Imgs/...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
415,Spec-163,0.310736,0.349786,0.339478,-0.005327,-0.000082,0.003391,0.297220,0.305787,0.309034,...,0.138733,0.105004,10.398206,2.797734,0.877064,0.054819,0.217739,0.089788,0.216830,[REV1_600/REV1_600Slices/10pics/Spec-163_Imgs/...
416,Spec-195,0.335551,0.339138,0.325312,-0.001358,0.011718,-0.014050,0.309818,0.312565,0.307158,...,0.143635,0.102686,13.459184,4.464352,0.847222,0.103073,0.206318,0.086632,0.385221,[REV1_600/REV1_600Slices/10pics/Spec-195_Imgs/...
417,Spec-342,0.334104,0.331458,0.334438,0.000891,0.002602,0.011794,0.313064,0.313294,0.313087,...,0.141975,0.124556,10.969777,2.483673,0.956011,0.078309,0.353728,0.148757,0.308272,[REV1_600/REV1_600Slices/10pics/Spec-342_Imgs/...
418,Spec-491,0.144886,0.140017,0.715096,0.251179,-0.011591,-0.003892,0.153152,0.142701,0.171072,...,0.138290,0.117119,11.570258,2.803110,0.920576,0.084933,0.278994,0.129881,0.174805,[REV1_600/REV1_600Slices/10pics/Spec-491_Imgs/...


In [11]:
raw_data = wandb.Artifact(
    name="train_df",
    type="Fabrics",
)
table = wandb.Table(dataframe=train_df)
raw_data.add(table, name="fabrics")
run.log_artifact(raw_data, aliases=[f"{nb_images}_images"])

raw_data = wandb.Artifact(
    name="test_df",
    type="Fabrics",
)
table = wandb.Table(dataframe=test_df)
raw_data.add(table, name="fabrics")
run.log_artifact(raw_data, aliases=[f"{nb_images}_images"])
run.finish()


VBox(children=(Label(value=' 1.38MB of 1.38MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

## Export Datasets

In [54]:
run = wandb.init(
    project="microstructure-reconstruction",
    group="upload_artifact",
    job_type="upload_dataset",
)
config = wandb.config
config.seed = 42
config.input_width = 64
config.train_val_split = 0.7
torch.manual_seed(config.seed)


<torch._C.Generator at 0x7f133ed2b130>

In [55]:
data_at = run.use_artifact("raw_fabrics" + ":3_images")
data_dir = data_at.download()
fabrics_df = tools.convert_table_to_dataframe(data_at.get("fabrics"))
single_entry_df = tools.convert_into_single_entry_df(fabrics_df)

train_df, test_df = train_test_split(
    single_entry_df,
    train_size=config.train_val_split,
    random_state=config.seed,
    shuffle=True,
)
transform = transforms.Compose(
    [
        transforms.CenterCrop(207),
        transforms.Resize((config.input_width, config.input_width)),
        transforms.ToTensor(),
        transforms.GaussianBlur(kernel_size=3, sigma=0.5),
    ]
)

train_dataset = SinglePhotoDataset(train_df, normalization=True, transform=transform)
validation_dataset = SinglePhotoDataset(
    test_df, normalization=[train_dataset.max, train_dataset.min], transform=transform
)


In [56]:
train_artifact = wandb.Artifact(name="train_dataset", type="Datasets")
tools.add_torch_object(train_artifact, train_dataset, "train_dataset")
tools.add_pickle_object(
    train_artifact,
    {
        "script": tools.get_cell_code(type(train_dataset)),
        "members": tools.get_members(train_dataset),
    },
    "creation_data_train_dataset",
)
run.log_artifact(train_artifact)

val_artifact = wandb.Artifact(name="validation_dataset", type="Datasets")
tools.add_torch_object(val_artifact, validation_dataset, "validation_dataset")
tools.add_pickle_object(
    val_artifact,
    {
        "script": tools.get_cell_code(type(validation_dataset)),
        "members": tools.get_members(validation_dataset),
    },
    "creation_data_validation_dataset",
)
run.log_artifact(val_artifact)
run.finish()
