In [1]:
from osgeo import osr
import pandas as pd
import utils

In [2]:
import rasterio
import h5py
import os
import numpy as np
from rasterio.enums import Resampling

### Detach bands in Sentinel-1 images (VH)

In [None]:
import os
import glob

# Paths to input and output files
input_dir = 'dataset/sentinel-1'
output_dir = 'dataset/VH'
dic = {'VV': 1, 'VH': 2}

for site in os.listdir(input_dir):
    print(site)
    os.makedirs(os.path.join(output_dir, site), exist_ok=True)
    site_path = os.path.join(input_dir, site)
    files = glob.glob(site_path + '/*.tif')
    for file in files:
        print(file)
        output_tif = os.path.join(output_dir, site ,os.path.basename(file).split('.tif')[0] + '_VH.tif')
        # Choose which band you want to extract (1-based index)
        band_to_extract = dic['VH']

        # Open the source TIFF
        with rasterio.open(file) as src:
            # Read the chosen band
            band_data = src.read(band_to_extract)

            # Copy the original profile (metadata), then update it for a single band
            profile = src.profile
            profile.update(count=1)  # We'll write only one band to the new file

            # Create and write to the new TIFF file
            with rasterio.open(output_tif, 'w', **profile) as dst:
                dst.write(band_data, 1)  # Write band_data to band index 1 in output


### Applying Average Filter for Sentinel-1 band

In [40]:
import rasterio
import numpy as np
import cv2

def crop_and_resize(input_path, output_path):
    with rasterio.open(input_path) as src:
        data = src.read(1)

        # Define target size
        new_height, new_width = 9, 9
        block_size = 100  # 900 / 9 = 100

        # Compute exact block averages
        downsampled_data = np.zeros((new_height, new_width), dtype=np.float32)
        for i in range(new_height):
            for j in range(new_width):
                block = data[i * block_size+1:(i + 1) * block_size+1, j * block_size:(j + 1) * block_size]
                downsampled_data[i, j] = np.mean(block)

        # Compute new transform based on cropped bounds
        x_min, y_max = src.bounds.left, src.bounds.top
        pixel_width = (src.transform.a * 900) / 9
        pixel_height = (src.transform.e * 900) / 9


        new_transform = rasterio.Affine(
            pixel_width, 0, x_min,
            0, pixel_height, y_max
        )

        # Debugging
        # print(f"Computed Pixel Size: {pixel_width}, {pixel_height}")
        # print(f"New Transform:\n{new_transform}")

        # Save output
        with rasterio.open(
            output_path, 'w',
            driver='GTiff',
            height=new_height,
            width=new_width,
            count=1,
            dtype=np.float32,
            crs=src.crs,
            transform=new_transform
        ) as dst:
            dst.write(downsampled_data, 1)
            print(f"Saved {output_path}")

In [43]:
import os
import glob
input_dir = 'dataset/VH'
output_dir = 'dataset/VH1km'

for site in os.listdir(input_dir)[1:]:
    os.makedirs(os.path.join(output_dir, site), exist_ok=True)
    site_path = os.path.join(input_dir, site)
    files = glob.glob(site_path + '/*.tif')
    for file in files:
        output_tif = os.path.join(output_dir, site, os.path.basename(file).split('.tif')[0] + '1km.tif')
        crop_and_resize(file, output_tif)

Saved dataset/VH1km/SCAN_Mana_House/SCAN_Mana_House_S1_2021-01-27_DESCENDING_VH1km.tif
Saved dataset/VH1km/SCAN_Mana_House/SCAN_Mana_House_S1_2021-04-09_DESCENDING_VH1km.tif
Saved dataset/VH1km/SCAN_Mana_House/SCAN_Mana_House_S1_2021-08-15_ASCENDING_VH1km.tif
Saved dataset/VH1km/SCAN_Mana_House/SCAN_Mana_House_S1_2021-03-24_ASCENDING_VH1km.tif
Saved dataset/VH1km/SCAN_Mana_House/SCAN_Mana_House_S1_2021-09-29_ASCENDING_VH1km.tif
Saved dataset/VH1km/SCAN_Mana_House/SCAN_Mana_House_S1_2020-12-31_ASCENDING_VH1km.tif
Saved dataset/VH1km/SCAN_Mana_House/SCAN_Mana_House_S1_2021-10-07_DESCENDING_VH1km.tif
Saved dataset/VH1km/SCAN_Mana_House/SCAN_Mana_House_S1_2021-06-04_DESCENDING_VH1km.tif
Saved dataset/VH1km/SCAN_Mana_House/SCAN_Mana_House_S1_2021-04-06_DESCENDING_VH1km.tif
Saved dataset/VH1km/SCAN_Mana_House/SCAN_Mana_House_S1_2021-03-21_ASCENDING_VH1km.tif
Saved dataset/VH1km/SCAN_Mana_House/SCAN_Mana_House_S1_2021-05-08_ASCENDING_VH1km.tif
Saved dataset/VH1km/SCAN_Mana_House/SCAN_Mana_Hou

### Applying Average Filter for NDVI 10 m

In [3]:
import rasterio
import numpy as np

def resample_ndvi_10m(input_path, output_path):
    with rasterio.open(input_path) as src:
        print(f"Nodata value: {src.nodata}")  # Check the stored nodata value
        data = src.read(1)

        # Define target size
        new_height, new_width = 9, 9
        block_size = 100  # 900 / 9 = 100

        # Initialize output array
        downsampled_data = np.zeros((new_height, new_width), dtype=np.float32)

        for i in range(new_height):
            for j in range(new_width):
                # Extract block
                block = data[i * block_size + 1:(i + 1) * block_size + 1, j * block_size:(j + 1) * block_size]

                # Count NaN and valid values
                nan_count = np.isnan(block).sum()
                # print(nan_count)
                # print(block.size)
                valid_count = block.size - nan_count  # Total - NaN count

                # If less than 50% are NaN, use np.nanmean
                if nan_count <= 0.5 * block.size:
                    downsampled_data[i, j] = np.nanmean(block)
                else:
                    downsampled_data[i, j] = np.mean(block)  
                    

        # Compute new transform based on cropped bounds
        x_min, y_max = src.bounds.left, src.bounds.top
        pixel_width = (src.transform.a * 900) / 9
        pixel_height = (src.transform.e * 900) / 9

        new_transform = rasterio.Affine(
            pixel_width, 0, x_min,
            0, pixel_height, y_max
        )

        # Save output
        with rasterio.open(
            output_path, 'w',
            driver='GTiff',
            height=new_height,
            width=new_width,
            count=1,
            dtype=np.float32,
            crs=src.crs,
            transform=new_transform,
            nodata=np.nan  # Preserve NaN values in output
        ) as dst:
            dst.write(downsampled_data, 1)
            print(f"Saved {output_path}")


In [4]:
import os
import glob
input_dir = 'downloads1'
output_dir = 'dataset/NDVI_1km'

for site in os.listdir(input_dir):
    os.makedirs(os.path.join(output_dir, site), exist_ok=True)
    site_path = os.path.join(input_dir, site)
    print(site_path)
    files = glob.glob(site_path + '/*.tif')
    for file in files:
        info = os.path.basename(file).split('_')
        new_name = 'ndvi_'+ '_'.join(info[2:])
        output_tif = os.path.join(output_dir, site, new_name.split('.tif')[0] + '_1km.tif')
        resample_ndvi_10m(file, output_tif)


downloads1/SCAN_Kukuihaele
Nodata value: nan
Saved dataset/NDVI_1km/SCAN_Kukuihaele/ndvi_SCAN_Kukuihaele_2020-09-10_1km.tif
Nodata value: nan
Saved dataset/NDVI_1km/SCAN_Kukuihaele/ndvi_SCAN_Kukuihaele_2020-07-03_1km.tif
Nodata value: nan
Saved dataset/NDVI_1km/SCAN_Kukuihaele/ndvi_SCAN_Kukuihaele_2022-02-04_1km.tif
Nodata value: nan
Saved dataset/NDVI_1km/SCAN_Kukuihaele/ndvi_SCAN_Kukuihaele_2020-09-10_1km.tif
Nodata value: nan
Saved dataset/NDVI_1km/SCAN_Kukuihaele/ndvi_SCAN_Kukuihaele_2022-05-03_1km.tif
Nodata value: nan
Saved dataset/NDVI_1km/SCAN_Kukuihaele/ndvi_SCAN_Kukuihaele_2021-02-17_1km.tif
Nodata value: nan
Saved dataset/NDVI_1km/SCAN_Kukuihaele/ndvi_SCAN_Kukuihaele_2020-04-30_1km.tif
Nodata value: nan
Saved dataset/NDVI_1km/SCAN_Kukuihaele/ndvi_SCAN_Kukuihaele_2020-08-17_1km.tif
Nodata value: nan
Saved dataset/NDVI_1km/SCAN_Kukuihaele/ndvi_SCAN_Kukuihaele_2022-05-14_1km.tif
Nodata value: nan
Saved dataset/NDVI_1km/SCAN_Kukuihaele/ndvi_SCAN_Kukuihaele_2021-04-17_1km.tif
Nod

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Dense

# 1. Load data
data = pd.read_csv('results.csv')
valid_data = data.dropna()
columns_to_scale = ['Sentinel-1 VH', 'LST']
scaler = StandardScaler()

# Normalize backscatter and temperature by Z-score
valid_data[columns_to_scale] = scaler.fit_transform(valid_data[columns_to_scale])

valid_data.to_csv('normalized.csv', index=False)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  valid_data[columns_to_scale] = scaler.fit_transform(valid_data[columns_to_scale])


In [19]:
# Chuẩn hóa dữ liệu
data = pd.read_csv('normalized.csv')
numeric_cols = ["Sentinel-1 VH", "SMAP", "NDVI", "LST", "sm"]
data[numeric_cols] = data[numeric_cols].apply(pd.to_numeric, errors = 'coerce')
print(data)
data = data.dropna()
# print(data.describe())
# print(data.isna().sum())  # Kiểm tra lại nếu còn NaN

# X = data.iloc[:, 4:8].values
# y = data.iloc[:,8].values.reshape(-1, 1)
# print(np.any(np.isinf(X)))  # Kiểm tra số vô cực
# print(np.any(np.isinf(y)))

               Station        Date     Lat      Lon  Sentinel-1 VH      SMAP  \
0    SCAN_Kemole_Gulch  2020-01-06  19.917 -155.583       1.000165  0.192975   
1    SCAN_Kemole_Gulch  2020-01-09  19.917 -155.583       0.898528  0.222180   
2    SCAN_Kemole_Gulch  2020-01-22  19.917 -155.583       0.648773  0.212895   
3    SCAN_Kemole_Gulch  2020-01-25  19.917 -155.583       0.648773  0.215142   
4    SCAN_Kemole_Gulch  2020-01-30  19.917 -155.583       1.005885  0.201809   
..                 ...         ...     ...      ...            ...       ...   
269  SCAN_Waimea_Plain  2022-02-12  20.017 -155.600       0.502522  0.190570   
270  SCAN_Waimea_Plain  2022-07-06  20.017 -155.600       0.564026  0.185421   
271  SCAN_Waimea_Plain  2022-07-14  20.017 -155.600       0.538016  0.208565   
272  SCAN_Waimea_Plain  2022-11-14  20.017 -155.600       0.198517  0.234694   
273  SCAN_Waimea_Plain  2022-11-25  20.017 -155.600       0.517406  0.311289   

         NDVI       LST        sm  
0  