In [1]:
from importlib import reload
from pathlib import Path
import pandas as pd
import torch
from custom_datasets.single_photo_dataset import SinglePhotoDataset

import wandb
from sklearn.model_selection import train_test_split
from torchvision import transforms
import tools

repoPath = tools.wandb_login()

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {"num_workers": 1, "pin_memory": True} if use_cuda else {}
print(f"[INFO]: Computation device: {device}")


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmatiasetcheverry[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/matias/.netrc


[INFO]: Computation device: cpu


## Export Fabrics Dataframe

In [2]:
run = wandb.init(
    project="microstructure-reconstruction",
    group="upload_artifact",
    job_type="upload_dataframe",
)
config = wandb.config
config.seed = 42
torch.manual_seed(config.seed)


<torch._C.Generator at 0x7fe62a791710>

In [3]:
topLevelFolder = repoPath / "REV1_600"
path_to_revs = topLevelFolder / "REV1_6003D_model"
path_to_fabrics = topLevelFolder / "fabrics.txt"
path_to_slices = topLevelFolder / "REV1_600Slices"


In [5]:
nb_images = 3

fabrics_df = pd.read_csv(path_to_fabrics)
path_to_images = [x for x in path_to_slices.glob(f"{nb_images}p*/")]


# We create a new column where each cell is a list of image paths
fabrics_df["photos"] = fabrics_df["id"].apply(
    func=tools.associate_rev_id_to_its_images,
    args=(path_to_slices, nb_images, repoPath),
)
fabrics_df = fabrics_df[fabrics_df.photos.str.len().gt(0)]
fabrics_df

Unnamed: 0,id,orientation-0_mean,orientation-0_std,orientation-1_mean,orientation-1_std,orientation-2_mean,orientation-2_std,orientation-3_mean,orientation-3_std,orientation-4_mean,...,aspectratio-1_mean,aspectratio-1_std,size_mean,size_std,solidity_mean,solidity_std,roundness_mean,roundness_std,volume_fraction,photos
1,Spec-1,0.324161,0.354717,0.321122,0.023721,0.022712,0.017081,0.307251,0.314178,0.307708,...,0.138574,0.104945,13.688080,4.475980,0.851367,0.097545,0.210755,0.086931,0.386223,[REV1_600/REV1_600Slices/3pics/Spec-1_Imgs/x-y...
2,Spec-10,0.342926,0.329461,0.327613,0.003943,-0.008105,-0.007804,0.309317,0.301727,0.307888,...,0.141167,0.097689,12.133247,3.346814,0.875065,0.051042,0.209093,0.082589,0.337122,[REV1_600/REV1_600Slices/3pics/Spec-10_Imgs/x-...
3,Spec-100,0.343094,0.346566,0.310340,-0.000564,0.023228,-0.008377,0.316436,0.317267,0.300596,...,0.140470,0.097848,13.673221,4.078330,0.865932,0.071402,0.208290,0.084531,0.339295,[REV1_600/REV1_600Slices/3pics/Spec-100_Imgs/x...
4,Spec-101,0.342479,0.330274,0.327246,-0.004556,-0.004594,0.001348,0.319543,0.311533,0.311565,...,0.141739,0.104199,9.809217,2.654313,0.874050,0.054507,0.212802,0.090636,0.160133,[REV1_600/REV1_600Slices/3pics/Spec-101_Imgs/y...
5,Spec-102,0.331517,0.347090,0.321393,-0.003134,0.006752,-0.003562,0.311683,0.312832,0.300369,...,0.137749,0.101959,11.310906,3.005233,0.875842,0.057680,0.215132,0.086989,0.278125,[REV1_600/REV1_600Slices/3pics/Spec-102_Imgs/y...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
596,Spec-95,0.357703,0.326197,0.316100,-0.001715,-0.004538,0.005818,0.312065,0.310956,0.297012,...,0.144185,0.100711,9.118604,2.326626,0.875007,0.054824,0.209903,0.087050,0.103977,[REV1_600/REV1_600Slices/3pics/Spec-95_Imgs/y-...
597,Spec-96,0.331500,0.347982,0.320518,-0.003447,0.009624,-0.006870,0.301939,0.303962,0.297789,...,0.140956,0.100593,11.723485,3.124647,0.877883,0.050196,0.210634,0.088092,0.278101,[REV1_600/REV1_600Slices/3pics/Spec-96_Imgs/y-...
598,Spec-97,0.334720,0.348321,0.316959,-0.006397,0.020998,-0.007150,0.309714,0.313304,0.294836,...,0.134415,0.097372,13.329017,3.869939,0.873163,0.066183,0.211562,0.081947,0.343699,[REV1_600/REV1_600Slices/3pics/Spec-97_Imgs/y-...
599,Spec-98,0.328898,0.346652,0.324450,0.001370,0.014416,0.008813,0.308190,0.317739,0.307033,...,0.136231,0.100775,13.390852,3.959654,0.867728,0.075259,0.209870,0.083762,0.396682,[REV1_600/REV1_600Slices/3pics/Spec-98_Imgs/x-...


In [6]:
raw_data = wandb.Artifact(
    name="raw_fabrics",
    type="Fabrics",
    description="original 100 fabrics with paths to images",
)
table = wandb.Table(dataframe=fabrics_df)
raw_data.add(table, name="fabrics")
run.log_artifact(raw_data, aliases=[f"{nb_images}_images"])
run.finish()


## Export Datasets

In [54]:
run = wandb.init(
    project="microstructure-reconstruction",
    group="upload_artifact",
    job_type="upload_dataset",
)
config = wandb.config
config.seed = 42
config.input_width = 64
config.train_val_split = 0.7
torch.manual_seed(config.seed)


<torch._C.Generator at 0x7f133ed2b130>

In [55]:
data_at = run.use_artifact("raw_fabrics" + ":3_images")
data_dir = data_at.download()
fabrics_df = tools.convert_table_to_dataframe(data_at.get("fabrics"))
single_entry_df = tools.convert_into_single_entry_df(fabrics_df)

train_df, test_df = train_test_split(
    single_entry_df,
    train_size=config.train_val_split,
    random_state=config.seed,
    shuffle=True,
)
transform = transforms.Compose(
    [
        transforms.CenterCrop(207),
        transforms.Resize((config.input_width, config.input_width)),
        transforms.ToTensor(),
        transforms.GaussianBlur(kernel_size=3, sigma=0.5),
    ]
)

train_dataset = SinglePhotoDataset(train_df, normalization=True, transform=transform)
validation_dataset = SinglePhotoDataset(
    test_df, normalization=[train_dataset.max, train_dataset.min], transform=transform
)


In [56]:
train_artifact = wandb.Artifact(name="train_dataset", type="Datasets")
tools.add_torch_object(train_artifact, train_dataset, "train_dataset")
tools.add_pickle_object(
    train_artifact,
    {
        "script": tools.get_cell_code(type(train_dataset)),
        "members": tools.get_members(train_dataset),
    },
    "creation_data_train_dataset",
)
run.log_artifact(train_artifact)

val_artifact = wandb.Artifact(name="validation_dataset", type="Datasets")
tools.add_torch_object(val_artifact, validation_dataset, "validation_dataset")
tools.add_pickle_object(
    val_artifact,
    {
        "script": tools.get_cell_code(type(validation_dataset)),
        "members": tools.get_members(validation_dataset),
    },
    "creation_data_validation_dataset",
)
run.log_artifact(val_artifact)
run.finish()
