In [None]:
!pip install rasterio

Collecting rasterio
  Downloading rasterio-1.3.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (21.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.3/21.3 MB[0m [31m57.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl (15 kB)
Collecting snuggs>=1.4.1 (from rasterio)
  Downloading snuggs-1.4.7-py3-none-any.whl (5.4 kB)
Installing collected packages: snuggs, affine, rasterio
Successfully installed affine-2.4.0 rasterio-1.3.8 snuggs-1.4.7


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Download Dataset from Drive

In [None]:
import shutil
import zipfile

shutil.copy('/content/drive/MyDrive/2023_dissertation/dataset_archives/Jasansky_2023_images.zip', '/content/Jasansky_2023_images.zip')

with zipfile.ZipFile('/content/Jasansky_2023_images.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/')

# Create mapping file which sorts the images into CD triplets

In [None]:
import os
import csv
from datetime import datetime
import re

source_dir = '/content/Jasansky_2023_images'  # replace with your source directory
mapping_file_path = os.path.join(source_dir, 'mapping.csv')

# Get the list of s2 files and sort them
s2_files = [f for f in os.listdir(source_dir) if f.startswith('s2_') and f.endswith('.tif')]
s2_files.sort()

# Initialize the facility name and date
prev_facility = None
prev_date = None
counter = 1
imageAWrite = True

# Open the mapping file in write mode
with open(mapping_file_path, 'w', newline='') as mapping_file:
    writer = csv.writer(mapping_file)
    writer.writerow(['id', 'imageA', 'imageB', 'mask'])  # Write the header

    for s2_file in s2_files:
        # Parse the facility name and date from the filename
        s2_file_without_ext = s2_file.replace('.tif', '')
        s2_prefix_and_rest, date_str = s2_file_without_ext.rsplit('_', 1)
        s2_prefix, rest = s2_prefix_and_rest.split('_', 1)
        facility = re.split(r'[\d-]', rest)[0].strip('_')

        date = datetime.strptime(date_str, '%Y-%m-%d')

        if facility == prev_facility and not imageAWrite:
            # If the facility is the same as the previous one, write the file to imageB
            new_filename = s2_file
            writer.writerow([str(counter).zfill(4), prev_filename, new_filename, f'{facility}.tif'])
            print(f'Wrote {counter:04d} {prev_filename}, {new_filename}, {facility}.tif to mapping file')
            imageAWrite = True
            counter += 1
        else:
            # If the facility is different, check for the corresponding mask file
            mask_file = f'{facility}.tif'
            if not os.path.exists(os.path.join(source_dir, 'masks', mask_file)):
                print(f'No mask file found for {facility}')

        # Write the s2 file to imageA
        if imageAWrite:
            imageAWrite = False

        # Update the facility name and date
        prev_filename = s2_file
        prev_facility = facility
        prev_date = date


Wrote 0001 s2_Artemyevsky_81.7826042963516_50.600017274045_2018-12-01.tif, s2_Artemyevsky_81.7826042963516_50.600017274045_2019-01-01.tif, Artemyevsky.tif to mapping file
Wrote 0002 s2_Artemyevsky_81.7826042963516_50.600017274045_2019-01-01.tif, s2_Artemyevsky_81.7826042963516_50.600017274045_2019-02-01.tif, Artemyevsky.tif to mapping file
Wrote 0003 s2_Artemyevsky_81.7826042963516_50.600017274045_2019-02-01.tif, s2_Artemyevsky_81.7826042963516_50.600017274045_2019-03-01.tif, Artemyevsky.tif to mapping file
Wrote 0004 s2_Artemyevsky_81.7826042963516_50.600017274045_2019-03-01.tif, s2_Artemyevsky_81.7826042963516_50.600017274045_2019-04-01.tif, Artemyevsky.tif to mapping file
Wrote 0005 s2_Artemyevsky_81.7826042963516_50.600017274045_2019-04-01.tif, s2_Artemyevsky_81.7826042963516_50.600017274045_2019-05-01.tif, Artemyevsky.tif to mapping file
Wrote 0006 s2_Artemyevsky_81.7826042963516_50.600017274045_2019-05-01.tif, s2_Artemyevsky_81.7826042963516_50.600017274045_2019-06-01.tif, Artemy

# Compress and Export to Drive

In [None]:
import zipfile

directory_to_compress = '/content/Jasansky_2023_images/'
zip_filename = '/content/Jasansky_2023_images.zip'

# Create a zip archive of the directory
with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
    for root, dirs, files in os.walk(directory_to_compress):
        for file_name in files:
            file_path = os.path.join(root, file_name)
            zipf.write(file_path, os.path.relpath(file_path, directory_to_compress))

In [None]:
# Copy the zip archive to Colab
destination_colab = '/content/drive/MyDrive/2023_dissertation/dataset_archives/Jasansky_2023_images.zip'
shutil.copyfile(zip_filename, destination_colab)

'/content/drive/MyDrive/2023_dissertation/dataset_archives/Jasansky_2023_images.zip'