In [None]:
import pandas as pd
import os
import random
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from tqdm import tqdm

import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
#from tensorflow.keras.layers import
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
image_folder = '/content/drive/MyDrive/Proyecto Cultivos/deepglobe/'
output_folder = '/content/drive/MyDrive/Proyecto Cultivos/deepglobe/tiles-128/'

Creamos la función **genera_tiles** para generar tiles a de un tamaño predeterminado a partir de cada imagen:



In [None]:
# We are going to split the images to a 128x128 tiles in order to have more samples and to be easy to process

filenames = os.listdir(os.path.join(image_folder, "train"))

def generate_tiles(image_folder, output_folder, tile_size=256, overlap=0):
  image_filenames = [filename for filename in filenames if "_sat" in filename]
  for image_filename in tqdm(image_filenames, desc="Processing images..."):
    image_path = os.path.join(image_folder, "train", image_filename)
    #print(f"image_path: {image_path}")
    mask_filename = image_filename.replace("_sat", "_mask").replace("jpg", "png")
    #print(f"mask_filename: {mask_filename}")
    mask_path = os.path.join(image_folder, "train", mask_filename)
    #print(f"mask_path: {mask_path}")

    image = Image.open(image_path)
    mask = Image.open(mask_path)

    width, height = image.size
    num_tiles_x = (width - overlap) // (tile_size - overlap)
    num_tiles_y = (height - overlap) // (tile_size - overlap)

    for i in range(num_tiles_x):
      for j in range(num_tiles_y):
        x = i * (tile_size - overlap)
        y = j * (tile_size - overlap)

        # Extract tile from the image and mask
        image_tile = image.crop((x, y, x + tile_size, y + tile_size))
        mask_tile = mask.crop((x, y, x + tile_size, y + tile_size))

        # Save tiles
        image_tile_filename = f"{os.path.splitext(image_filename)[0]}_tile_{i}_{j}.jpg"
        image_tile_path = os.path.join(output_folder, image_tile_filename)
        image_tile.save(image_tile_path)

        mask_tile_filename = f"{os.path.splitext(mask_filename)[0]}_tile_{i}_{j}.png"
        mask_tile_path = os.path.join(output_folder, mask_tile_filename)
        mask_tile.save(mask_tile_path)

In [None]:
generate_tiles(image_folder, output_folder, tile_size=128)

Processing images...: 100%|██████████| 803/803 [04:13<00:00,  3.17it/s]


In [None]:
# Add new tiles to a csv and dataframe (csv con path e id de cada imagen y su máscara)

def save_tiles_df(folder):
    tile_filenames = os.listdir(folder)

    img_ids = []
    sat_image_paths = []
    mask_paths = []

    for tile_filename in tqdm(tile_filenames, desc="Processing..."):
        if "_sat" in tile_filename:
            # Extract image id
            image_id = tile_filename.split("_")[0]

            # Create paths
            sat_image_path = os.path.join(folder, tile_filename)
            mask_filename = tile_filename.replace("_sat_tile", "_mask_tile").replace(".jpg", ".png")
            mask_path = os.path.join(folder, mask_filename)
            # Append data to lists
            img_ids.append(image_id)
            sat_image_paths.append(sat_image_path)
            mask_paths.append(mask_path)

    # New df
    data = {'image_id': img_ids, 'sat_image_path': sat_image_paths, 'mask_path': mask_paths}
    df_tiles = pd.DataFrame(data)

    # Save df
    output_csv_path = os.path.join(folder, 'tiles-512.csv')
    df_tiles.to_csv(output_csv_path, index=False)

    return df_tiles



In [None]:
df_tiles = save_tiles_df('/content/drive/MyDrive/Proyecto Cultivos/deepglobe/tiles-128/')

In [None]:
# Save df
df_tiles.to_csv('/content/drive/MyDrive/Proyecto Cultivos/deepglobe/tiles-128.csv', index=False)

Ahora replicamos una función similar para las imágenes que no tienen máscara:

In [None]:
# We are going to split the images to a 128x128 tiles in order to have more samples and to be easy to process from test and validation images

def generate_tiles_nomask(image_folder, output_folder, images_set, tile_size=256, overlap=0):
  image_filenames = [filename for filename in filenames if "_sat" in filename]
  for image_filename in tqdm(image_filenames, desc="Processing images..."):
    image_path = os.path.join(image_folder, images_set, image_filename)
    #print(image_path)

    image = Image.open(image_path)

    width, height = image.size
    num_tiles_x = (width - overlap) // (tile_size - overlap)
    num_tiles_y = (height - overlap) // (tile_size - overlap)

    for i in range(num_tiles_x):
      for j in range(num_tiles_y):
        x = i * (tile_size - overlap)
        y = j * (tile_size - overlap)

        # Extract tile from the image and mask
        image_tile = image.crop((x, y, x + tile_size, y + tile_size))

        # Save tiles
        image_tile_filename = f"{os.path.splitext(image_filename)[0]}_tile_{i}_{j}.jpg"
        #print(image_tile_filename)
        image_tile_path = os.path.join(output_folder, image_tile_filename)
        #print(image_tile_path)
        image_tile.save(image_tile_path)

In [None]:
# Save tiles without mask
def save_tiles_nomask_df(folder):
  tile_filenames = os.listdir(folder)

  img_ids = []
  sat_image_paths = []

  for tile_filename in tqdm(tile_filenames, desc="Processing..."):
    if "_sat" in tile_filename:
      # Extract image id
      image_id = tile_filename.split("_")[0]

      # Create paths
      sat_image_path = os.path.join(folder, tile_filename)

      # Append data to lists
      img_ids.append(image_id)
      sat_image_paths.append(sat_image_path)

      #print("Satellite Image Path:", sat_image_path)
      #print("Mask Path:", mask_path)

  # New df
  data = {'image_id': img_ids, 'sat_image_path': sat_image_paths}
  df_nomask_tiles = pd.DataFrame(data)
  print(len(df_nomask_tiles))

  return df_nomask_tiles


In [None]:
output_folder = '/content/drive/MyDrive/Proyecto Cultivos/deepglobe/val_tiles-128/'
filenames = os.listdir(os.path.join(image_folder, "valid"))
generate_tiles_nomask(image_folder, output_folder, "valid", tile_size=128)

In [None]:
df_val_tiles = save_tiles_nomask_df('/content/drive/MyDrive/Proyecto Cultivos/deepglobe/val_tiles-128')

In [None]:
# Save df
df_val_tiles.to_csv('/content/drive/MyDrive/Proyecto Cultivos/deepglobe/val_tiles-128.csv', index=False)

In [None]:
output_folder = '/content/drive/MyDrive/Proyecto Cultivos/deepglobe/test_tiles-128/'
filenames = os.listdir(os.path.join(image_folder, "test"))

generate_tiles_nomask(image_folder, output_folder, "test", tile_size=128)
df_test_tiles = save_tiles_nomask_df('/content/drive/MyDrive/Proyecto Cultivos/deepglobe/test_tiles-128')
df_test_tiles.head()

In [None]:
# Save df
df_test_tiles.to_csv('/content/drive/MyDrive/Proyecto Cultivos/deepglobe/test_tiles-128.csv', index=False)

In [None]:
# Num files with mask (included)

num_files = 0
with os.scandir('/content/drive/MyDrive/Proyecto Cultivos/deepglobe/tiles-128') as entries:
    for entry in entries:
        if entry.is_file():
            num_files += 1

# Print the result
print(f"Number of files in the directory: {num_files}")

Number of files in the directory: 579767


In [None]:
#Num files without mask in folder val_tiles_128

num_files = 0
with os.scandir('/content/drive/MyDrive/Proyecto Cultivos/deepglobe/val_tiles-128') as entries:
    for entry in entries:
        if entry.is_file():
            num_files += 1

# Print the result
print(f"Number of files in the directory: {num_files}")

Number of files in the directory: 61731


In [None]:
#Num files without mask in folder test_tiles_128

num_files = 0
with os.scandir('/content/drive/MyDrive/Proyecto Cultivos/deepglobe/test_tiles-128') as entries:
    for entry in entries:
        if entry.is_file():
            num_files += 1

# Print the result
print(f"Number of files in the directory: {num_files}")

Number of files in the directory: 62092


In [None]:
!pip install gdown
# Zip and download to local save (security copy)
!zip -r 'Path_selected/tiles-128.zip' 'Path_selected/tiles-128'

# Download the ZIP file
files.download('Path_selected/tiles-128.zip')