In [24]:
import sys
from pathlib import Path

IS_COLAB = "google.colab" in sys.modules
IS_KAGGLE = "kaggle_secrets" in sys.modules
if IS_KAGGLE:
    repo_path = Path("../input/microstructure-reconstruction")
elif IS_COLAB:
    from google.colab import drive

    drive.mount("/content/gdrive")
    repo_path = Path("/content/gdrive/MyDrive/microstructure-reconstruction")
else:
    repo_path = Path("/home/matias/microstructure-reconstruction")
sys.path.append(str(repo_path))

from copy import deepcopy
from importlib import reload

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.optim as optim
import torchmetrics
import wandb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader
from torchvision import models as pretrained_models
from torchvision import transforms, utils
from tqdm import tqdm

from custom_datasets import dataset
from custom_models import models
from tools import dataframe_reformat, inspect_code, plotting, training, wandb_api

log_wandb = True

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {"num_workers": 2, "pin_memory": True} if use_cuda else {"num_workers": 4}
print(f"[INFO]: Computation device: {device}")


[INFO]: Computation device: cpu


## Export Fabrics Dataframe

In [25]:
run = wandb.init(
    project="microstructure-reconstruction",
    group="upload_artifact",
    job_type="upload_dataframe",
)

if log_wandb:
    config = wandb.config
else:
    config = {}

config["job_type"] = run.job_type if "run" in locals() else "test"
config["train_val_split"] = 0.7
config["seed"] = 42
config["log_wandb"] = log_wandb
torch.manual_seed(config["seed"])
pl.seed_everything(config["seed"])





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Global seed set to 42


42

In [26]:
topLevelFolder = repo_path / "REV1_600"
path_to_revs = topLevelFolder / "REV1_6003D_model"
path_to_fabrics = topLevelFolder / "fabrics.txt"
path_to_slices = topLevelFolder / "REV1_600Slices"


In [34]:
nb_images = 5

fabrics_df = pd.read_csv(path_to_fabrics)
path_to_images = [x for x in path_to_slices.glob(f"{nb_images}p*/")]
fabrics_df["photos"] = fabrics_df["id"].apply(
    func=dataframe_reformat.associate_rev_id_to_its_images,
    args=(path_to_slices, nb_images, repo_path),
)
fabrics_df = fabrics_df[fabrics_df.photos.str.len().gt(0)]
fabrics_df["photos"] = fabrics_df["photos"].apply(func=lambda x: sorted(x))
train_df, test_df = train_test_split(
    fabrics_df,
    train_size=config["train_val_split"],
    random_state=config["seed"],
    shuffle=True,
)
train_df.reset_index(drop=True, inplace=True)
test_df.reset_index(drop=True, inplace=True)
train_df.iloc[:, -1] = train_df.iloc[:, -1].apply(func=sorted, key=lambda path: float(path[path.find("[")+1:path.find("]")]))
test_df.iloc[:, -1] = test_df.iloc[:, -1].apply(func=sorted, key=lambda path: float(path[path.find("[")+1:path.find("]")]))

In [35]:
raw_data = wandb.Artifact(
    name="train_df",
    type="Fabrics",
)
table = wandb.Table(dataframe=train_df)
raw_data.add(table, name="fabrics")
run.log_artifact(raw_data, aliases=[f"{nb_images}_images"])

raw_data = wandb.Artifact(
    name="test_df",
    type="Fabrics",
)
table = wandb.Table(dataframe=test_df)
raw_data.add(table, name="fabrics")
run.log_artifact(raw_data, aliases=[f"{nb_images}_images"])
# run.finish()


<wandb.sdk.wandb_artifacts.Artifact at 0x7f2e1f953eb0>

In [36]:
run.finish()




VBox(children=(Label(value='3.246 MB of 3.246 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

## Export Datasets

In [54]:
run = wandb.init(
    project="microstructure-reconstruction",
    group="upload_artifact",
    job_type="upload_dataset",
)
config = wandb.config
config.seed = 42
config.input_width = 64
config.train_val_split = 0.7
torch.manual_seed(config.seed)


<torch._C.Generator at 0x7f133ed2b130>

In [55]:
data_at = run.use_artifact("raw_fabrics" + ":3_images")
data_dir = data_at.download()
fabrics_df = tools.convert_table_to_dataframe(data_at.get("fabrics"))
single_entry_df = tools.convert_into_single_entry_df(fabrics_df)

train_df, test_df = train_test_split(
    single_entry_df,
    train_size=config.train_val_split,
    random_state=config.seed,
    shuffle=True,
)
transform = transforms.Compose(
    [
        transforms.CenterCrop(207),
        transforms.Resize((config.input_width, config.input_width)),
        transforms.ToTensor(),
        transforms.GaussianBlur(kernel_size=3, sigma=0.5),
    ]
)

train_dataset = SinglePhotoDataset(train_df, normalization=True, transform=transform)
validation_dataset = SinglePhotoDataset(
    test_df, normalization=[train_dataset.max, train_dataset.min], transform=transform
)


In [56]:
train_artifact = wandb.Artifact(name="train_dataset", type="Datasets")
tools.add_torch_object(train_artifact, train_dataset, "train_dataset")
tools.add_pickle_object(
    train_artifact,
    {
        "script": tools.get_cell_code(type(train_dataset)),
        "members": tools.get_members(train_dataset),
    },
    "creation_data_train_dataset",
)
run.log_artifact(train_artifact)

val_artifact = wandb.Artifact(name="validation_dataset", type="Datasets")
tools.add_torch_object(val_artifact, validation_dataset, "validation_dataset")
tools.add_pickle_object(
    val_artifact,
    {
        "script": tools.get_cell_code(type(validation_dataset)),
        "members": tools.get_members(validation_dataset),
    },
    "creation_data_validation_dataset",
)
run.log_artifact(val_artifact)
run.finish()
