In [1]:
import pandas as pd 
import rasterio 
import numpy as np
from rasterio.transform import rowcol
from datetime import datetime
import os

In [23]:
def get_pixel_value(raster_path, lon, lat):
    with rasterio.open(raster_path) as src:
        row, col = rowcol(src.transform, lon, lat)
        try: 
            # Get the pixel value
            value = src.read(1)[row, col]
            if value == np.nan:
                return None 
            return value 
        except IndexError:
            return None 

In [10]:
sites = ['SCAN_Kukuihaele', 'SCAN_Mana_House', 'SCAN_Silver_Sword', 'SCAN_Kemole_Gulch', 'TAHMO_CRIG_(Soil_Moisture_Station_1)', 'SCAN_Waimea_Plain', 'TAHMO_CRIG_(Soil_Moisture_Station_2)']
data_sources = ['lst', 'ndvi_1km', 'VH1km', 'smap1km_pm_ASC']
data_paths = [f'dataset/{d}' for d in data_sources]
lst_dir = 'dataset/lst'
ndvi_dir = 'dataset/NDVI_1km'
vh_dir = 'dataset/VH1km'
smap_pm_asc_dir = 'dataset/smap1km_pm_ASC'

site_info = pd.read_csv('site_info.csv')
site_loc = {}
for index, row in site_info.iterrows():
    net = row['network']
    station = row['station']
    lat = row['lat']
    lon = row['lon']
    site_loc[net + '_' + station] = {'lat': lat, 'lon' : lon}

site_loc

{'SCAN_Bosque_Seco': {'lat': 17.967, 'lon': -66.867},
 'SCAN_Combate': {'lat': 17.983, 'lon': -67.167},
 'SCAN_Isabela': {'lat': 18.467, 'lon': -67.05},
 'SCAN_Kainaliu': {'lat': 19.533, 'lon': -155.933},
 'SCAN_Kemole_Gulch': {'lat': 19.917, 'lon': -155.583},
 'SCAN_Kukuihaele': {'lat': 20.1, 'lon': -155.517},
 'SCAN_Mana_House': {'lat': 19.95, 'lon': -155.533},
 'SCAN_Maricao_Forest': {'lat': 18.15, 'lon': -67.0},
 'SCAN_Silver_Sword': {'lat': 19.767, 'lon': -155.417},
 'SCAN_Waimea_Plain': {'lat': 20.017, 'lon': -155.6},
 'TAHMO_CRIG_(Soil_Moisture_Station_1)': {'lat': 6.23025, 'lon': -0.34655},
 'TAHMO_CRIG_(Soil_Moisture_Station_2)': {'lat': 6.22984, 'lon': -0.34698},
 'TAHMO_CSIR-SARI,_Nyankpala_-_Tamale': {'lat': 9.40083, 'lon': -1.00191},
 'TAHMO_KNUST_Farm,_Kumasi': {'lat': 6.69099, 'lon': -1.51909}}

In [29]:
results = []
sites = sorted(sites)
for site in sites[:-2]:
    csv_path = f"daily_smap/{site}_smap.csv"
    df = pd.read_csv(csv_path)
    valid_dates = df.dropna(subset='smap_lon')
    dates = valid_dates['time'].tolist()

    

    lat = site_loc[site]['lat']
    lon = site_loc[site]['lon']

    for date in dates:
        sm = valid_dates.loc[valid_dates['time'] == date, 'sm'].values[0]

        lst_path = f'{lst_dir}/{site}_smap/{date}_VNP21A1N.tif'
        ndvi_path = f'{ndvi_dir}/{site}/ndvi_{site}_{date}_1km.tif'
        vh_path = f'{vh_dir}/{site}/{site}_S1_{date}_ASCENDING_VH1km.tif'
        smap_path = f'{smap_pm_asc_dir}/{site}/smap_pm_1km_{site}_{date}.tif'

        if not os.path.exists(lst_path):
            print("Missing lst files")
            continue

        if not os.path.exists(ndvi_path):
            print("Missing ndvi files")
            continue

        if not os.path.exists(vh_path):
            print("Missing s1 files")
            print(vh_path)
            continue

        if not os.path.exists(smap_path):
            print("Missing smap files")
            continue

        # if not all([os.path.exists(lst_path), os.path.exists(smap_path), 
        #             os.path.exists(ndvi_path), os.path.exists(vh_path)]):
        #     print("Missing files")
        #     continue

        lst_value = get_pixel_value(lst_path, lon, lat)
        ndvi_value = get_pixel_value(ndvi_path, lon, lat)
        vh_value = get_pixel_value(vh_path, lon, lat)
        smap_value = get_pixel_value(smap_path, lon, lat)

        print(date, lst_value, ndvi_value, vh_value, smap_value)

        if None in (lst_value, ndvi_value, vh_value, smap_value):
            # print("Missing values")
            continue 

        results.append({
            "Station": site,
            "Date": date,
            "Lat": lat,
            "Lon": lon,
            "Sentinel-1 VH": vh_value,
            "SMAP": smap_value,
            "NDVI": ndvi_value,
            "LST": lst_value,
            "sm" : sm
        })

# Lưu kết quả vào file CSV
df = pd.DataFrame(results)
df.to_csv('results.csv', index=False)


2020-01-06 277.48 0.3852083 -15.786287 0.19297484
2020-01-09 278.34000000000003 0.3852083 -16.008629 0.22218007
Missing ndvi files
2020-01-17 nan 0.4079067 -15.986378 0.2523789
2020-01-22 272.18 0.41486064 -16.554995 0.21289466
2020-01-25 276.78000000000003 0.41818383 -16.554995 0.2151418
2020-01-30 276.06 0.45613837 -15.773773 0.2018085
2020-02-02 279.38 0.49615645 -16.096622 0.22056562
2020-02-07 nan 0.49615645 -16.41425 0.23776464
2020-02-10 276.3 nan -16.41425 0.22427684
2020-02-15 282.02 0.5371609 -16.207115 0.23993117
2020-02-18 277.38 0.50336194 -16.207115 0.26872298
2020-02-23 nan 0.53551346 -16.049059 0.21155742
2020-02-26 281.64 0.5179635 -16.196346 0.25339076
2020-03-02 nan 0.53276646 -15.761706 0.20314682
2020-03-05 nan 0.5432646 -15.761706 0.2121507
2020-03-10 276.2 0.540837 -16.638823 0.17962228
2020-03-13 nan 0.5390015 -16.638823 0.21698502
2020-03-18 nan 0.5260522 -16.010456 0.23354793
2020-03-21 nan 0.5260522 -16.154753 0.25325605
2020-03-26 nan 0.49808484 -15.912763 0