In [3]:
!pip install geopandas
!pip install rasterio

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting geopandas
  Downloading geopandas-0.10.2-py2.py3-none-any.whl (1.0 MB)
[K     |████████████████████████████████| 1.0 MB 6.6 MB/s 
[?25hCollecting fiona>=1.8
  Downloading Fiona-1.8.21-cp37-cp37m-manylinux2014_x86_64.whl (16.7 MB)
[K     |████████████████████████████████| 16.7 MB 434 kB/s 
[?25hCollecting pyproj>=2.2.0
  Downloading pyproj-3.2.1-cp37-cp37m-manylinux2010_x86_64.whl (6.3 MB)
[K     |████████████████████████████████| 6.3 MB 43.9 MB/s 
Collecting click-plugins>=1.0
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Collecting munch
  Downloading munch-2.5.0-py2.py3-none-any.whl (10 kB)
Collecting cligj>=0.5
  Downloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Installing collected packages: munch, cligj, click-plugins, pyproj, fiona, geopandas
Successfully installed click-plugins-1.1.1 cligj-0.7.2 fiona-1.8.21 geopandas-0.10.2 munch-2.5.0 pyproj-3.2.

In [7]:
import torch
from torch.utils.data import DataLoader, Dataset
import cv2
from rasterio.features import rasterize
import numpy as np
import geopandas as gpd
from glob import glob
import rasterio
import matplotlib.pyplot as plt
from torch import nn

In [8]:
class ImageUtils:
  def get_random_crop(image, crop_size):
    crop_height, crop_width = crop_size
    max_x = image.shape[1] - crop_width
    max_y = image.shape[0] - crop_height

    x = np.random.randint(0, max_x)
    y = np.random.randint(0, max_y)

    crop = image[y: y + crop_height, x: x + crop_width]

    return crop


class CustomData(Dataset):
  def __init__(self, gdf, size, path, no_crop_per_img, cropped_size = (64,64)):
    super().__init__()
    self.gdf = gdf
    self.size = size
    self.path = path
    self.no_crop_per_img = no_crop_per_img
    self.cropped_size = cropped_size
    img_profile = rasterio.open(path[0][0]).profile
    label = gdf.to_crs(img_profile['crs'])
    labels = rasterize(
        shapes=[(row.geometry, int(row.crop_type)) for i, row in label.iterrows()], # Construct polygon and value tuples
        out_shape=(img_profile['width'], img_profile['height']),
        transform=img_profile['transform']                                          # This will make every pixel in the target image align correctly with satellite image
    )
    self.labels = cv2.resize(labels, self.size)

  def __len__(self):
    return len(path)

  def __getitem__(self, index):
    arr_img = []
    arr_labels = []
    for date in self.path[index]:
      img = rasterio.open(date)
      img = img.read()[0]

      reshaped_img = cv2.resize(img, self.size)
      cropped_labels = []
      cropped_images = []
      for i in range(self.no_crop_per_img):
        seed = np.random.randint(0,2**32)
        np.random.seed(seed)
        cropped_label = ImageUtils.get_random_crop(self.labels, self.cropped_size)
        cropped_img = ImageUtils.get_random_crop(reshaped_img, self.cropped_size)
        cropped_labels.append(cropped_label)
        cropped_images.append(cropped_img)

      labels = np.stack(cropped_labels, axis = -1)
      croppped_arr = np.stack(cropped_images, axis = -1)

      arr_img.append(croppped_arr)
      arr_labels.append(labels)

    arr_img = np.stack(arr_img, axis=-1)
    # arr_img = arr_img.reshape(arr_img.shape[0:-2] + (-1,))

    arr_labels = np.stack(arr_labels, axis=-1)
    # arr_labels = arr_labels.reshape(arr_labels.shape[0:-2] + (-1,))

    return {'img':arr_img, 'labels':arr_labels}

In [9]:
class ImageUtils:
  def get_random_crop(image, crop_size):
    crop_height, crop_width = crop_size
    max_x = image.shape[1] - crop_width
    max_y = image.shape[0] - crop_height

    x = np.random.randint(0, max_x)
    y = np.random.randint(0, max_y)

    crop = image[y: y + crop_height, x: x + crop_width]

    return crop


class CustomData(Dataset):
  def __init__(self, gdf, size, path, no_crop_per_img, cropped_size = (64,64), num_train = 10):
    super().__init__()
    self.gdf = gdf
    self.size = size
    self.path = path
    self.no_crop_per_img = no_crop_per_img
    self.cropped_size = cropped_size
    self.num_train = num_train
    img_profile = rasterio.open(path['20210101'][0]).profile
    label = gdf.to_crs(img_profile['crs'])
    labels = rasterize(
        shapes=[(row.geometry, int(row.crop_type)) for i, row in label.iterrows()], # Construct polygon and value tuples
        out_shape=(img_profile['width'], img_profile['height']),
        transform=img_profile['transform']                                          # This will make every pixel in the target image align correctly with satellite image
    )
    self.labels = cv2.resize(labels, self.size)

  def __len__(self):
    return self.num_train

  def __getitem__(self, index):

    arr_img = []
    seed = np.random.randint(0,2**32)
    np.random.seed(seed)

    for k,v in self.path.items():
      for img_path in v:
        img = rasterio.open(img_path)
        img = img.read()[0]
        reshaped_img = cv2.resize(img, self.size)
        
        cropped_img = ImageUtils.get_random_crop(reshaped_img, self.cropped_size)
        arr_img.append(cropped_img)
    
    labels = ImageUtils.get_random_crop(self.labels, self.cropped_size)
    images = np.stack(arr_img, axis = -1)
    return {'images':images, 'labels':labels}

In [10]:
from tqdm import tqdm
def create_dataset(path):
  img_profile = rasterio.open(path['20210101'][0]).profile
  label = gdf.to_crs(img_profile['crs'])
  labels = rasterize(
      shapes=[(row.geometry, int(row.crop_type)) for i, row in label.iterrows()], # Construct polygon and value tuples
      out_shape=(img_profile['width'], img_profile['height']),
      transform=img_profile['transform']                                          # This will make every pixel in the target image align correctly with satellite image
  )
  labels = cv2.resize(labels, (2051,2051))

  arr_img = []
  seed = np.random.randint(0,2**32)
  np.random.seed(seed)
  for i in tqdm(range(10)):
    label = []
    images = []
    for k,v in path.items():
      for img_path in v:
        img = rasterio.open(img_path)
        img = img.read()[0]
        reshaped_img = cv2.resize(img, (2051,2051))
        
        cropped_img = ImageUtils.get_random_crop(reshaped_img, (64,64))
        arr_img.append(cropped_img)
  
    label = ImageUtils.get_random_crop(labels, (64,64))
    images = np.stack(arr_img, axis = -1)
    np.save(f'labels{i}.npy',label)
    np.save(f'images{i}.npy',images)
    print(i)

In [None]:
create_dataset(img_path_dict)

 10%|█         | 1/10 [04:14<38:08, 254.28s/it]

0


In [12]:
gdf = gpd.read_file('/content/drive/MyDrive/VarunaHackathon2022/training_area/traindata.shp')
path = '/content/VarunaHackathon2022/sentinel-2-image/2021/*'
day = glob(path)
date = [i.split('/')[-1] for i in day]

def imagelist(x): 
  return glob('/content/drive/MyDrive/VarunaHackathon2022/sentinel-2-image/2021/' + x + '/IMG_DATA/*.jp2' )

img_path_dict = {k:imagelist(k) for k in date}
size = (2051,2051)
train_dataset = CustomData(gdf, size, img_path_dict, 16)

In [101]:
for i in train_dataset:
  input = i
  break

In [89]:
input['labels']

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)

In [87]:
input['images'].shape

(64, 64, 1065)

In [90]:
!cp /content/drive/MyDrive/VarunaHackathon2022 -d /content/

In [None]:
class Model1(nn.Modules):
  def __init__(self):
    pass
  def forward(self, image):
    pass