In [1]:
!pip install GDAL

Defaulting to user installation because normal site-packages is not writeable


In [2]:
!pip freeze

affine==2.4.0
alembic==1.12.1
annotated-types==0.6.0
anyio==3.7.1
appdirs==1.4.4
apturl==0.5.2
argon2-cffi==23.1.0
argon2-cffi-bindings==21.2.0
arrow==1.3.0
asgiref==3.7.2
asttokens==2.4.0
async-lru==2.0.4
attrs==23.1.0
Automat==22.10.0
Babel==2.13.0
backcall==0.2.0
beautifulsoup4==4.10.0
beniget==0.4.1
bleach==6.1.0
blessed==1.20.0
blinker==1.4
blis==0.7.11
Brlapi==0.8.3
Brotli==1.0.9
catalogue==2.0.10
certifi==2020.6.20
cffi==1.16.0
chardet==4.0.0
charset-normalizer==3.3.0
click==8.0.3
click-plugins==1.1.1
cligj==0.7.2
cloudpathlib==0.16.0
cmdstanpy==1.2.0
colorama==0.4.4
comm==0.1.4
command-not-found==0.3
confection==0.1.3
constantly==15.1.0
coverage==7.3.2
cryptography==41.0.4
cssselect==1.2.0
cupshelpers==1.0
cycler==0.11.0
cymem==2.0.8
dbus-python==1.2.18
debugpy==1.8.0
decorator==4.4.2
defer==1.0.6
defusedxml==0.7.1
distro==1.7.0
distro-info==1.1+ubuntu0.2
Django==5.0.2
djangorestframework==3.14.0
exceptiongroup==1.1.3
executing==2.0.0
fastapi==0.104.1
fastjsonschema==2.18.1
fil

In [7]:
# PREPARING OUR KERNEL SESSION TO LOAD SERIALIZED MODELS

import torch
import joblib
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
import os
import json
import time
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
import joblib
import rasterio
from json import JSONEncoder
from rasterio.crs import CRS
from osgeo import gdal

# custom definitions
from src.classes.VGGUdeaSpectral import VGGUdeaSpectral
from src.classes.MultipleRegressionModel import MultipleRegressionModel


def rmse_score(net, X, y):
    y_pred = net.predict(X)
    rmse = (mean_squared_error(y_true=y, y_pred=y_pred)) ** 0.5
    return -rmse  # Skorch tries to maximize the score, so negate the RMSE

In [3]:
from src.classes.VGGUdeaSpectral import VGGUdeaSpectral


vgg_model = joblib.load("src/trained_models/VGGUdeaSpectral1/VGGUdeaSpectral_model1.joblib")

vgg_model

In [4]:
def read_tif_file(filepath):
    with rasterio.open(filepath) as src:
        return np.array(src.read()), src.meta

def save_prediction_tile(prediction, metadata, output_dir, filename):
    # Create a new 100x100 tile filled with the prediction value
    prediction_tile = np.full((100, 100), prediction, dtype=np.float32)
    
    # Update metadata for the tile
    new_meta = metadata.copy()
    new_meta.update({
        'dtype': 'float32',
        'count': 1,
        'driver': 'GTiff'
    })
    
    # Save the tile to a TIFF file
    with rasterio.open(os.path.join(output_dir, filename), 'w', **new_meta) as dest:
        dest.write(prediction_tile, 1)

def save_metadata(metadata_list, save_path):
    # Modify metadata to serialize
    for meta in metadata_list:
        if 'sentinel_meta' in meta:
            if 'crs' in meta['sentinel_meta']:
                # Convert CRS object to string representation
                meta['sentinel_meta']['crs'] = str(meta['sentinel_meta']['crs'])
    with open(save_path, 'w') as f:
        json.dump(metadata_list, f, indent=4)

def create_file_list(directory, output_file):
    with open(output_file, 'w') as file_list:
        for filename in os.listdir(directory):
            if filename.endswith('.tif'):
                file_path = os.path.join(directory, filename)
                file_list.write(file_path + '\n')

In [5]:
# Directories
sentinel_dir = 'data/sentinel2rgbmedian2020.py'  # Your Sentinel data directory
base_name = os.path.basename(sentinel_dir).replace('.py', '')
prediction_tiles_dir = f'prediction_tiles/prediction_{base_name}'


os.makedirs(prediction_tiles_dir, exist_ok=True)  # Ensure the directory exists

metadata_list = []
file_list_path = os.path.join(prediction_tiles_dir, 'file_list.txt')
filenames = os.listdir(sentinel_dir)

filenames = os.listdir(sentinel_dir)
for filename in tqdm(filenames):
    sentinel_path = os.path.join(sentinel_dir, filename)
    sentinel_img, sentinel_meta = read_tif_file(sentinel_path)
    image_tensor = torch.tensor(sentinel_img).to(dtype=torch.float32)
    prediction = vgg_model.predict(image_tensor.unsqueeze(0))
    save_prediction_tile(prediction, sentinel_meta, prediction_tiles_dir, f'pred_{filename}')
    metadata_list.append({'filename': f'pred_{filename}', 'sentinel_meta': sentinel_meta})

save_metadata(metadata_list, os.path.join(prediction_tiles_dir, 'metadata_test.json'))
create_file_list(prediction_tiles_dir, file_list_path)
print("Data preparation, prediction, and serialization complete.")
vrt_command = f"gdalbuildvrt -input_file_list {file_list_path} {os.path.join(prediction_tiles_dir, 'predictions.vrt')}"
os.system(vrt_command)

100%|██████████| 64106/64106 [09:16<00:00, 115.24it/s]


Data preparation, prediction, and serialization complete.
0...10...20...30...40...50...60...70...80...90...100 - done.


0

In [30]:
os.system(vrt_command)

0...10...20...30...40...50...60...70...80...90...100 - done.


0

In [9]:
from osgeo import gdal

def convert_vrt_to_png(vrt_path, png_path, output_width, output_height):
    # Open the VRT file
    dataset = gdal.Open(vrt_path, gdal.GA_ReadOnly)
    
    # Get the maximum and minimum values (assuming one band)
    band = dataset.GetRasterBand(1)
    min, max = band.ComputeRasterMinMax()

    # Configure options for rendering
    # Scale the data to 0-255 range and convert to Byte
    scale = 255 / (max - min)
    options = gdal.TranslateOptions(format='PNG', outputType=gdal.GDT_Byte,
                                    width=output_width, height=output_height,
                                    scaleParams=[[min, max, 0, 255]],
                                    callback=gdal.TermProgress_nocb)
    
    # Perform the conversion
    gdal.Translate(png_path, dataset, options=options)
    
    # Close the dataset
    dataset = None
    print(f"Image saved as {png_path}")

# Specify the path to your VRT file and the output PNG file
vrt_file_path = 'prediction_tiles/prediction_sentinel2rgbmedian2020/predictions.vrt'
output_png_path = 'prediction_tiles/prediction_sentinel2rgbmedian2020/output.png'

# Define desired output dimensions
desired_width = 2560  # or any other size
desired_height = 1440  # or any other size

# Convert VRT to PNG
convert_vrt_to_png(vrt_file_path, output_png_path, desired_width, desired_height)


0...10...20...30...40...50...60...70...80...90...100 - done.
Image saved as prediction_tiles/prediction_sentinel2rgbmedian2020/output.png


# VRT for the O.G dataset



In [6]:
import os
from osgeo import gdal
from tqdm import tqdm

# Directory for Sentinel data
sentinel_dir = 'data/sentinel2rgbmedian2020.py'

# Create a directory to store the VRT file
os.makedirs(sentinel_dir, exist_ok=True)

# Path to save the list of tile file paths
file_list_path = os.path.join(sentinel_dir, 'file_list.txt')

# Write paths to all the tiles in the sentinel directory to the file list
with open(file_list_path, 'w') as file_list:
    for filename in tqdm(os.listdir(sentinel_dir)):
        if filename.endswith('.tif'):  # Ensure only TIFF files are included
            file_path = os.path.join(sentinel_dir, filename)
            file_list.write(file_path + '\n')

# Path where the VRT will be saved
vrt_path = os.path.join(sentinel_dir, f'{os.path.basename(sentinel_dir)}.vrt')

# Command to build the VRT using the list of TIFF files
vrt_command = f"gdalbuildvrt -input_file_list {file_list_path} {vrt_path}"
os.system(vrt_command)

print(f"VRT created at {vrt_path}")


100%|██████████| 64107/64107 [00:00<00:00, 1230625.31it/s]


0...10...20...30...40...50...60...70...80...90...100 - done.
VRT created at data/sentinel2rgbmedian2020.py/sentinel2rgbmedian2020.py.vrt


In [7]:
from osgeo import gdal

def convert_vrt_to_rgb_png(vrt_path, png_path, output_width, output_height):
    # Open the VRT file
    dataset = gdal.Open(vrt_path, gdal.GA_ReadOnly)
    
    # Initialize the scale parameters
    scale_params = []
    
    # Loop through each band (assuming RGB)
    for band_num in range(1, 4):  # Bands 1 to 3 for RGB
        band = dataset.GetRasterBand(band_num)
        min_val, max_val = band.ComputeRasterMinMax()
        
        # Scale the data to 0-255 range and convert to Byte
        # Append each band's scale parameters to the list
        if max_val != min_val:  # Avoid division by zero
            scale_params.append([min_val, max_val, 0, 255])
        else:
            scale_params.append([])  # No scaling needed if no variation
    
    # Configure options for rendering
    options = gdal.TranslateOptions(format='PNG', outputType=gdal.GDT_Byte,
                                    width=output_width, height=output_height,
                                    scaleParams=scale_params,
                                    callback=gdal.TermProgress_nocb)
    
    # Perform the conversion
    gdal.Translate(png_path, dataset, options=options)
    
    # Close the dataset
    dataset = None
    print(f"Image saved as {png_path}")


    

In [8]:
# Specify the path to your VRT file and the output PNG file
vrt_file_path = 'data/sentinel2rgbmedian2020.py/sentinel2rgbmedian2020.py.vrt'
output_png_path = 'data/sentinel2rgbmedian2020.py/sentinel2rgbmedian2020.py.vrt/output.png'

# Define desired output dimensions
desired_width = 2560  # or any other size
desired_height = 1440  # or any other size

# Convert VRT to PNG
convert_vrt_to_rgb_png(vrt_file_path, output_png_path, desired_width, desired_height)

ERROR 1: data/sentinel2rgbmedian2020.py/36933a1f5a53b.tif, band 1: Failed to compute min/max, no valid pixels found in sampling.
ERROR 1: data/sentinel2rgbmedian2020.py/36933a1f5a53b.tif, band 2: Failed to compute min/max, no valid pixels found in sampling.
ERROR 1: data/sentinel2rgbmedian2020.py/36933a1f5a53b.tif, band 3: Failed to compute min/max, no valid pixels found in sampling.
ERROR 4: Unable to create png file data/sentinel2rgbmedian2020.py/sentinel2rgbmedian2020.py.vrt/output.png.



Image saved as data/sentinel2rgbmedian2020.py/sentinel2rgbmedian2020.py.vrt/output.png
