In [2]:
import requests

# Define the URL of the ZIP file
zip_url = "https://github.com/thsant/wgisd/archive/refs/heads/master.zip"

# Define the path to save the ZIP file in Google Drive
save_path = "/content/drive/MyDrive/q2.zip"

# Download the ZIP file
response = requests.get(zip_url)
with open(save_path, "wb") as file:
    file.write(response.content)

print("ZIP file downloaded and saved to Google Drive.")


ZIP file downloaded and saved to Google Drive.


In [4]:
%cd "/content/drive/MyDrive"
!unzip "q2.zip"

/content/drive/MyDrive
Archive:  q2.zip
6910edc5ae3aae8c20062941b1641821f0c30127
   creating: wgisd-master/
  inflating: wgisd-master/CODE_OF_CONDUCT.md  
  inflating: wgisd-master/LICENSE    
  inflating: wgisd-master/README.md  
  inflating: wgisd-master/WGISD.ipynb  
 extracting: wgisd-master/classes.txt  
   creating: wgisd-master/coco_annotations/
  inflating: wgisd-master/coco_annotations/test_bbox_instances.json  
  inflating: wgisd-master/coco_annotations/test_polygons_instances.json  
  inflating: wgisd-master/coco_annotations/train_bbox_instances.json  
  inflating: wgisd-master/coco_annotations/train_polygons_instances.json  
   creating: wgisd-master/contrib/
   creating: wgisd-master/contrib/berries/
  inflating: wgisd-master/contrib/berries/CDY_2015-berries.txt  
  inflating: wgisd-master/contrib/berries/CDY_2016-berries.txt  
  inflating: wgisd-master/contrib/berries/CDY_2017-berries.txt  
  inflating: wgisd-master/contrib/berries/CDY_2018-berries.txt  
  inflating: wgis

In [6]:
import os
import numpy as np
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import shutil
import random

varietals = ['CDY', 'CFR', 'CSV', 'SVB', 'SYH']
instances = {v: [] for v in varietals}

class CustomDataset:
    def __init__(self, varietals, data_dir):
        self.varietals = varietals
        self.data_dir = data_dir

    def __len__(self):
        return len(self.varietals)

    def __getitem__(self, index):
        varietal = self.varietals[index]
        filenames = instances[varietal]

        images = []
        annotations = []
        for filename in filenames:
            image_path = os.path.join(self.data_dir, filename + '.jpg')
            annotation_path = os.path.join(self.data_dir, filename + '.txt')

            image = np.array(Image.open(image_path))
            annotation = np.loadtxt(annotation_path)

            images.append(image)
            annotations.append(annotation)

        return images, annotations

data_dir = '/content/drive/MyDrive/data/'

dataset = CustomDataset(varietals, data_dir)

batch_size = 5
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

num_iterations = 10
batch_folder_dir = '/content/drive/MyDrive/wgisd-master/data/batched'
os.makedirs(batch_folder_dir, exist_ok=True)
for iteration in range(num_iterations):
    batch_images, batch_annotations = next(iter(data_loader))

    batch_folder = f'batch_{iteration + 1}'
    os.makedirs(os.path.join(batch_folder_dir, batch_folder), exist_ok=True)

    for i, (image, annotation) in enumerate(zip(batch_images, batch_annotations)):
        image_path = os.path.join(batch_folder_dir, batch_folder, f'image_{i}.jpg')
        annotation_path = os.path.join(batch_folder_dir, batch_folder, f'annotation_{i}.txt')

        Image.fromarray(image).save(image_path)
        np.savetxt(annotation_path, annotation)
