In [1]:
import rasterio 
import numpy as np
import pandas as pd
import re
import os
import csv
import geopandas as gpd
from rasterio.transform import rowcol  
from datetime import datetime, timedelta

### Get locations of each  points

In [2]:
region = 'china'

In [3]:
shapefile_path = f'{region}/{region}_points/{region}_points.shp'

gdf = gpd.read_file(shapefile_path)

print(gdf.head())

   id         lon        lat                    geometry
0   1  109.088892  28.192974  POINT (109.08889 28.19297)
1   2  109.095495  28.180645   POINT (109.0955 28.18064)
2   3  109.094058  28.172290  POINT (109.09406 28.17229)
3   4  109.095406  28.165912  POINT (109.09541 28.16591)
4   5  109.093878  28.152527  POINT (109.09388 28.15253)


In [4]:
gdf['latitude'] = gdf.geometry.y
gdf['longitude'] = gdf.geometry.x

# Convert the GeoDataFrame to a DataFrame
df = pd.DataFrame(gdf.drop(columns='geometry'))
# Save the dataframe to a CSV file
csv_file_path = f'{region}/{region}_points/{region}_points.csv'
df = df.rename(columns = {'field_1': 'id'})
df.to_csv(csv_file_path, index=False)

### Get pixels' values of points 

In [5]:
tiff_folder = f'{region}/{region}_tif'
csv_file = f'{region}/{region}_points/{region}_points.csv'
s1_date_csv = f'{region}/{region}_s1_metadata.csv'
station_df = pd.read_csv(csv_file)
s1_date = pd.read_csv(s1_date_csv)
count = 1
if not {'id', 'latitude', 'longitude'}.issubset(station_df.columns):
    raise ValueError("CSV file must contain 'id', 'latitude', and 'longitude' columns.")

results = []

for image in os.listdir(tiff_folder):
    if not image.endswith('.tiff'):
        continue
    date = re.search(r'(\d{4}-\d{2}-\d{2})', image)
    date = date.group(1) if date else None

    if date is None:
        continue  
    
    date_obj = datetime.strptime(date, '%Y-%m-%d')
    previous_date = (date_obj - timedelta(days=1)).strftime('%Y-%m-%d')

    # Check if the previous date's image exists in the folder 
    if previous_date in s1_date['date'].values:
        date = previous_date
        # print('date is changed to previous date:', date)
    else:
        print('There is data for S1 for this date:', date)

    tiff_path = os.path.join(tiff_folder, image)
    with rasterio.open(tiff_path) as src:
        transform = src.transform
        
        for _, row in station_df.iterrows():
            latitude = row['latitude']
            longitude = row['longitude']

            station_id = row['id']

            # Convert latitude and longitude to row and column indices
            try:
                row_index, col_index = rowcol(transform, longitude, latitude)
                quality_flag = src.read(1)[row_index, col_index]
                if quality_flag != 0:
                    # print(f"Quality flag for {station_id} on {date}: {quality_flag}")
                    pixel_value = np.nan
                    count += 1
                    continue
                pixel_value = src.read(2)[row_index, col_index] 
                # print(f"Pixel value for {station_id} on {date}: {pixel_value}")
                sm_25 = round(pixel_value * 2, 5) 
            except IndexError:
                # print('Ohh no! The pixel is out of bounds for the image.')
                pixel_value = np.nan 
            
            results.append({
                'id': int(station_id),
                'date': date,
                'latitude': latitude,
                'longitude': longitude,
                'pixel_value': pixel_value,
                'sm': sm_25
            }) 

# Convert results to DataFrame 
results_df = pd.DataFrame(results)
# Save results to CSV
output_csv_path = f'{region}/{region}_swc_values.csv'

results_df.to_csv(output_csv_path, index=False)

There is data for S1 for this date: 2021-02-12
There is data for S1 for this date: 2020-04-30
There is data for S1 for this date: 2022-02-12
There is data for S1 for this date: 2021-06-12
There is data for S1 for this date: 2022-03-08
There is data for S1 for this date: 2020-11-08
There is data for S1 for this date: 2021-04-25
There is data for S1 for this date: 2020-07-16
There is data for S1 for this date: 2020-12-26
There is data for S1 for this date: 2022-10-10
There is data for S1 for this date: 2020-01-01
There is data for S1 for this date: 2021-07-30
There is data for S1 for this date: 2021-04-06
There is data for S1 for this date: 2022-03-03
There is data for S1 for this date: 2021-09-21
There is data for S1 for this date: 2021-11-03
There is data for S1 for this date: 2020-03-13
There is data for S1 for this date: 2020-02-18
There is data for S1 for this date: 2020-09-26
There is data for S1 for this date: 2022-06-07
There is data for S1 for this date: 2021-12-21
There is data

### Get infomation of each site (value, location)

In [6]:
network = 'CHINA'

def create_site_info(input_csv, output_csv):
    # Read the existing csv with soil water content data
    df = pd.read_csv(input_csv)

    # Select unique locations 
    site_df = df[['id', 'latitude', 'longitude']].drop_duplicates().sort_values(by = 'id')

    # Add 'Network' and 'station' columns 
    site_df['network'] = network
    site_df['station'] =  site_df['id']
    site_df['s_depth'] = 0 
    site_df['e_depth'] = 5

    site_df = site_df[['network', 'station', 'latitude', 'longitude', 's_depth', 'e_depth']].rename(columns = {'latitude': 'lat', 'longitude': 'lon'})
    site_df.to_csv(output_csv, index=False)
    print(f'Site info save to {output_csv}')

In [7]:
input_csv = f'{region}/{region}_swc_values.csv'
output_csv = f'{region}/{region}_site_info.csv'
create_site_info(input_csv, output_csv)

Site info save to china/china_site_info.csv


In [8]:
from datetime import datetime 
import os 

def create_data_csv_files(input_csv, output_folder):
    df = pd.read_csv(input_csv)

    df['time'] = pd.to_datetime(df['date'])

    # Extract Day of Year  
    df['DoY'] = df['time'].dt.dayofyear

    # Create station column
    df['station'] = df['id']

    # Ensure output directory exists 
    os.makedirs(output_folder, exist_ok=True)

    for station, station_df in df.groupby('station'):
        station_file = os.path.join(output_folder, f'{int(station)}.csv')
        
        # Select required columns and save 
        station_df['sm_count'] = 1
        station_df.to_csv(station_file, index = False)

        print(f"Save: {station_file}")

In [9]:
input_csv = f'{region}/{region}_swc_values.csv'
output_folder = f'{region}/{region}_cvs'
create_data_csv_files(input_csv, output_folder)


Save: china/china_cvs/1.csv
Save: china/china_cvs/2.csv
Save: china/china_cvs/3.csv
Save: china/china_cvs/4.csv
Save: china/china_cvs/5.csv
Save: china/china_cvs/6.csv
Save: china/china_cvs/8.csv
Save: china/china_cvs/9.csv
Save: china/china_cvs/10.csv
Save: china/china_cvs/11.csv
Save: china/china_cvs/12.csv
Save: china/china_cvs/13.csv
Save: china/china_cvs/14.csv
Save: china/china_cvs/15.csv
Save: china/china_cvs/16.csv
Save: china/china_cvs/17.csv
Save: china/china_cvs/18.csv
Save: china/china_cvs/19.csv
Save: china/china_cvs/20.csv
Save: china/china_cvs/21.csv
Save: china/china_cvs/22.csv
Save: china/china_cvs/23.csv
Save: china/china_cvs/24.csv
Save: china/china_cvs/25.csv
Save: china/china_cvs/26.csv
Save: china/china_cvs/27.csv
Save: china/china_cvs/28.csv
Save: china/china_cvs/29.csv
Save: china/china_cvs/30.csv
Save: china/china_cvs/31.csv
Save: china/china_cvs/32.csv
Save: china/china_cvs/33.csv
Save: china/china_cvs/36.csv
Save: china/china_cvs/37.csv
Save: china/china_cvs/

In [11]:
china_sites = pd.read_csv('china/china_site_info.csv')
n = len(china_sites)

half = n//2 

china_site_1 = china_sites.iloc[:half].reset_index(drop = True)
china_site_2 = china_sites.iloc[half:].reset_index(drop = True)

china_site_1.to_csv('china/china_site_info_1.csv')
china_site_2.to_csv('china/china_site_info_2.csv')