## About the notebook
This notebook is intended for creating a folder structure for a deep learning using the 'datasets' library. There should be train and test folder, respectively, under a dataset folder. Each subfolder inside train/ and test must be named according to the class label, and all images for that class should be placed in the corresponding folder.


In [0]:
!pip install geopandas

In [0]:
import zipfile
import geopandas as gpd
import os
from PIL import Image

In [0]:
ZIP_PATH = '/Volumes/prd_datascience_depcaribbeansids/volumes/depcaribbeansids/tiles_VCT.zip'
OUTPUT_PATH = '/Volumes/prd_datascience_depcaribbeansids/volumes/depcaribbeansids/'

In [0]:
with zipfile.ZipFile(ZIP_PATH, 'r') as zip_ref:
  zip_ref.extractall(OUTPUT_PATH)

In [0]:
labels = gpd.read_file("/Volumes/prd_datascience_depcaribbeansids/volumes/depcaribbeansids/tiles_VCT.geojson")
labels = labels[labels["roof_material_dataset"].isin(["train", "test"])][["filename", "roof_material", "roof_pitch", "roof_material_dataset"]]
labelled_files = labels["filename"].to_list()

In [0]:
target = 'material'
# target = 'pitch'

In [0]:
train = labels[labels.roof_material_dataset == "train"]
test = labels[labels.roof_material_dataset == "test"]
train_index = list(train.index)
test_index = list(test.index)

In [0]:
DIRECTORY_PATH = '/Volumes/prd_datascience_depcaribbeansids/volumes/depcaribbeansids'
for label in ['train', 'test']:
  if target == 'material':
    for material in ['healthy_metal', 'concrete_cement', 'incomplete', 'irregular_metal']:
      if not os.path.exists(DIRECTORY_PATH + '/dataset_material/' + label + '/' + material):
        os.makedirs(DIRECTORY_PATH + '/dataset_material/' + label + '/' + material)
        print(f"Directory '{DIRECTORY_PATH + '/dataset_material/' + label + '/' + material}' created or already exists.")

  elif target == 'pitch':
    for pitch in ['gable', 'hip', 'flat', 'no_roof']:
      if not os.path.exists(DIRECTORY_PATH + '/dataset_pitch/' + label + '/' + pitch):
        os.makedirs(DIRECTORY_PATH + '/dataset_pitch/' + label + '/' + pitch)
        print(f"Directory '{DIRECTORY_PATH + '/dataset_pitch/' + label + '/' + pitch}' created or already exists.")

In [0]:
if target == 'material':
  for i in train_index:
    image = Image.open('/Volumes/prd_datascience_depcaribbeansids/volumes/depcaribbeansids/tiles_VCT/tiles_VCT/'+train["filename"][i])
    if train["roof_material"][i] == "concrete/cement":
      OUTPUT_PATH = "/Volumes/prd_datascience_depcaribbeansids/volumes/depcaribbeansids/dataset_material/train/concrete_cement/" + train["filename"][i]
    else:
      OUTPUT_PATH = "/Volumes/prd_datascience_depcaribbeansids/volumes/depcaribbeansids/dataset_material/train/" + train["roof_material"][i] + "/" + train["filename"][i]
    image.save(OUTPUT_PATH, format="tiff")

elif target == 'pitch':
  for i in train_index:
    image = Image.open('/Volumes/prd_datascience_depcaribbeansids/volumes/depcaribbeansids/tiles_VCT/tiles_VCT/'+train["filename"][i])
    OUTPUT_PATH = "/Volumes/prd_datascience_depcaribbeansids/volumes/depcaribbeansids/dataset_pitch/train/" + train["roof_pitch"][i] + "/" + train["filename"][i]
    image.save(OUTPUT_PATH, format="tiff")

In [0]:
if target == 'material':
  for i in test_index:
    image = Image.open('/Volumes/prd_datascience_depcaribbeansids/volumes/depcaribbeansids/tiles_VCT/tiles_VCT/'+test["filename"][i])
    if test["roof_material"][i] == "concrete/cement":
      OUTPUT_PATH = "/Volumes/prd_datascience_depcaribbeansids/volumes/depcaribbeansids/dataset_material/test/concrete_cement/" + test["filename"][i]
    else:
      OUTPUT_PATH = "/Volumes/prd_datascience_depcaribbeansids/volumes/depcaribbeansids/dataset_material/test/" + test["roof_material"][i] + "/" + test["filename"][i]
    image.save(OUTPUT_PATH, format="tiff")

elif target == 'pitch':
  for i in test_index:
    image = Image.open('/Volumes/prd_datascience_depcaribbeansids/volumes/depcaribbeansids/tiles_VCT/tiles_VCT/'+test["filename"][i])
    OUTPUT_PATH = "/Volumes/prd_datascience_depcaribbeansids/volumes/depcaribbeansids/dataset_pitch/test/" + test["roof_pitch"][i] + "/" + test["filename"][i]
    image.save(OUTPUT_PATH, format="tiff")