In [6]:
import os
import pandas as pd
import numpy as np

def load_soil_data(soil_file):
    soil_df = pd.read_csv(soil_file, delim_whitespace=True, header=None)
    return soil_df

def process_files(data_folder, soil_file, output_folder):
    # Load soil data
    soil_df = load_soil_data(soil_file)
    
    # Ensure output directory exists
    os.makedirs(output_folder, exist_ok=True)
    
    # Iterate over data files
    for file in os.listdir(data_folder):
        if file.startswith("wflux_"):
            # Extract coordinates from filename
            parts = file.split("_")
            lat, lon = float(parts[1]), float(parts[2])
            
            # Find matching coordinate in soil data
            match = soil_df[(soil_df[2] == lat) & (soil_df[3] == lon)]
            if match.empty:
                continue
            
            # Extract depth values
            depth1, depth2, depth3 = match.iloc[0, [22, 23, 24]]
            cdp = np.cumsum([depth1, depth2, depth3])
            
            # Read wflux file
            file_path = os.path.join(data_folder, file)
            df = pd.read_csv(file_path, delim_whitespace=True, header=None)
            
            # Extract date components
            df["year"], df["month"], df["day"] = df[0], df[1], df[2]
            
            sm1, sm2, sm3 = df.iloc[:, 7], df.iloc[:, 8], df.iloc[:, 9]
            
            # Calculate sm_surface (for 0.05 depth)
            if cdp[0] >= 0.05:
                sm_surface = (sm1 * (0.05 / cdp[0]))/50
            elif cdp[1] >= 0.05:
                sm_surface = (sm1 + (sm2 * ((0.05 - cdp[0]) / depth2)))/50
            else:
                sm_surface = (sm1 + sm2 + (sm3 * ((0.05 - cdp[1]) / depth3)))/50
            
            # Calculate sm_rootzone (for 1 depth)
            if cdp[0] >= 1:
                sm_rootzone = (sm1 * (1 / cdp[0]))/1000
            elif cdp[1] >= 1:
                sm_rootzone = (sm1 + (sm2 * ((1 - cdp[0]) / depth2)))/1000
            elif cdp[2] >= 1:
                sm_rootzone = (sm1 + sm2 + (sm3 * ((1 - cdp[1]) / depth3)))/1000
            else:
                sm_rootzone = (sm1 + sm2 + sm3)/1000
            
            # Create result DataFrame
            result_df = pd.DataFrame({
                "year": df["year"],
                "month": df["month"],
                "day": df["day"],
                "sm_surface": sm_surface,
                "sm_rootzone": sm_rootzone
            })
            
            # Save to CSV
            output_file = os.path.join(output_folder, file + ".csv")
            result_df.to_csv(output_file, index=False)

# Define paths
data_folder = r"model_lat_long"
soil_file = r"SOIL_autocalibrated_complete.unknown"
output_folder = r"final_results_1"

# Run processing
process_files(data_folder, soil_file, output_folder)


In [1]:
import os
import pandas as pd

# Define the required date range
start_date = 20150101
end_date = 20151231

# Path to your model data folder (where multiple lat-lon CSVs exist)
model_data_folder = "final_results"

# List all model files (each file is for a single lat-lon)
files = [f for f in os.listdir(model_data_folder) if f.endswith(".csv")]

# Dictionary to store data by date
data_by_date = {}

# Process each file (one per lat-lon)
for file in files:
    file_path = os.path.join(model_data_folder, file)
    
    # Read the CSV (assuming format: year, month, day, sm_surface, sm_rootzone)
    df = pd.read_csv(file_path)
    
    # Extract lat-long from filename (assuming filename is like 'wflux_XX_YY.csv')
    parts = file.replace(".csv", "").split("_")
    lat, lon = float(parts[1]), float(parts[2])
    
    # Add lat-long columns
    df["lat"] = lat
    df["lon"] = lon
    
    # Convert year, month, day to a single date column (YYYYMMDD format)
    df["date"] = df["year"] * 10000 + df["month"] * 100 + df["day"]
    
    # Filter data for the required date range
    df = df[(df["date"] >= start_date) & (df["date"] <= end_date)]
    
    # Select required columns
    df = df[["date", "lat", "lon", "sm_surface", "sm_rootzone"]]
    
    # Store data by date
    for _, row in df.iterrows():
        date = row["date"]
        if date not in data_by_date:
            data_by_date[date] = []
        data_by_date[date].append(row[1:])

# Ensure output directory exists
output_folder = "model_datewise"
os.makedirs(output_folder, exist_ok=True)

# Save new CSVs in a date-wise format (like SMAP)
for date, rows in data_by_date.items():
    output_file = os.path.join(output_folder, f"{date}.csv")
    pd.DataFrame(rows).to_csv(output_file, index=False)

print("Model data successfully converted to date-wise format for 2016-2017!")


Model data successfully converted to date-wise format for 2016-2017!


In [None]:
#CKDTREE
import os
import pandas as pd
import numpy as np
from scipy.spatial import cKDTree

# Paths
smap_file = "20160101.csv"  
model_file = r"model_data_datewise\20160101.0.csv"  
output_folder = "regridded_results"

# Ensure output directory exists
os.makedirs(output_folder, exist_ok=True)

# Load SMAP Data
smap_df = pd.read_csv(smap_file)

# Load Model Data
model_df = pd.read_csv(model_file)

# Extract lat-long
model_coords = model_df[["lat", "lon"]].values
smap_coords = smap_df[["Latitude", "Longitude"]].values

# Create KDTree
tree = cKDTree(model_coords)

# Find nearest model point for each SMAP point
_, nearest_indices = tree.query(smap_coords)

# Assign the nearest model's soil moisture values
smap_df["sm_surface_model"] = model_df["sm_surface"].iloc[nearest_indices].values
smap_df["sm_rootzone_model"] = model_df["sm_rootzone"].iloc[nearest_indices].values

# Save output
output_file = os.path.join(output_folder, "20160101_nearest.csv")
smap_df.to_csv(output_file, index=False)

print("✅ Nearest-neighbor regridding for 20160101 completed! Check:", output_file)


✅ Nearest-neighbor regridding for 20160101 completed! Check: regridded_results\20160101_nearest.csv


In [1]:
import os
import numpy as np
import pandas as pd
from scipy.interpolate import griddata

# Define input folders
smap_folder = "smap_datewise"  # Folder containing SMAP CSVs
model_folder = "model_datewise"  # Folder containing Model CSVs
output_folder = "regridded_results"
os.makedirs(output_folder, exist_ok=True)

# Get sorted list of SMAP and Model files
date_files = sorted(os.listdir(smap_folder))  # Assuming filenames match across folders

# Process each file
for file in date_files:
    smap_path = os.path.join(smap_folder, file)
    model_path = os.path.join(model_folder, file.replace(".csv", ".0.csv"))  # Adjust extension if needed
    
    if not os.path.exists(model_path):
        print(f"Skipping {file}: No matching model file found.")
        continue
    
    # Load SMAP data
    smap_df = pd.read_csv(smap_path)  # Columns: Latitude, Longitude, sm_surface, sm_rootzone
    smap_coords = smap_df[["Latitude", "Longitude"]].values
    
    # Load Model data
    model_df = pd.read_csv(model_path)  # Columns: lat, lon, sm_surface, sm_rootzone
    model_coords = model_df[['lat', 'lon']].values  
    
    # Perform bilinear interpolation
    smap_df["sm_surface_model"] = griddata(model_coords, model_df["sm_surface"], smap_coords, method="linear")
    smap_df["sm_rootzone_model"] = griddata(model_coords, model_df["sm_rootzone"], smap_coords, method="linear")
    
    # Save regridded data
    output_file = os.path.join(output_folder, file)
    smap_df.to_csv(output_file, index=False)
    
    print(f"Processed {file}, saved to {output_file}")

print("Regridding completed for all files.")


Processed 20160101.csv, saved to regridded_results\20160101.csv
Processed 20160102.csv, saved to regridded_results\20160102.csv
Processed 20160103.csv, saved to regridded_results\20160103.csv
Processed 20160104.csv, saved to regridded_results\20160104.csv
Regridding completed for all files.


In [None]:
import os
import numpy as np
import pandas as pd
from scipy.interpolate import griddata

# Folder paths
smap_folder = "smap_datewise"
model_folder = "model_datewise"
output_folder = "regridded_results"

os.makedirs(output_folder, exist_ok=True)

# Get sorted list of SMAP and Model files
date_files = sorted(os.listdir(smap_folder))  # Assuming filenames match across folders

print("Processing files in order:", date_files)  # Debugging step

# Iterate over all files in the SMAP folder
for file in date_files:
    smap_path = os.path.join(smap_folder, file)
    model_path = os.path.join(model_folder, file.replace(".csv", ".0.csv"))  # Adjust extension if needed

    # Debugging: Check if file exists
    if not os.path.exists(smap_path):
        print(f"SMAP file missing: {file}")
        continue

    if not os.path.exists(model_path):
        print(f"Model file missing: {file.replace('.csv', '.0.csv')}")
        continue

    # Check if files are empty
    if os.stat(smap_path).st_size == 0:
        print(f"Skipping {file} (SMAP file is empty)")
        continue

    if os.stat(model_path).st_size == 0:
        print(f"Skipping {file} (Model file is empty)")
        continue

    try:
        # Load SMAP data
        smap_df = pd.read_csv(smap_path)
        model_df = pd.read_csv(model_path)

        # Debugging: Check if DataFrames are empty
        if smap_df.empty or model_df.empty:
            print(f"Skipping {file} (One of the files is empty)")
            continue

        # Get coordinates
        smap_coords = smap_df[["Latitude", "Longitude"]].values
        model_coords = model_df[['lat', 'lon']].values  

        # Perform interpolation
        smap_df["sm_surface_model"] = griddata(model_coords, model_df["sm_surface"], smap_coords, method="linear")
        smap_df["sm_rootzone_model"] = griddata(model_coords, model_df["sm_rootzone"], smap_coords, method="linear")

        # Save the regridded data
        output_file = os.path.join(output_folder, file)
        smap_df.to_csv(output_file, index=False)
        print(f"Regridded data saved for {file}")

    except Exception as e:
        print(f"Error processing {file}: {e}")
        continue


Processing files in order: ['20160101.csv', '20160102.csv', '20160103.csv', '20160104.csv']
✅ Regridded data saved for 20160101.csv
✅ Regridded data saved for 20160102.csv
✅ Regridded data saved for 20160103.csv
✅ Regridded data saved for 20160104.csv


In [20]:
regridded_csv = pd.read_csv("regridded_results/20160101.csv")
regridded_csv["sm_surface_model"].sum(skipna=True)

291.1494311674947

In [21]:
regridded_csv["sm_surface"].sum(skipna=True)

8402.966636678999

In [None]:
import pandas as pd

# Load any representative SMAP file
sample_smap_file = "20160101.csv"  # Use any valid file
smap_df = pd.read_csv(sample_smap_file)

# Extract and save only Latitude and Longitude
smap_grid = smap_df[["Latitude", "Longitude"]].drop_duplicates()
smap_grid.to_csv("smap_grid.csv", index=False)

print("SMAP grid extracted and saved as smap_grid.csv")


✅ SMAP grid extracted and saved as smap_grid.csv


In [None]:
import os
import numpy as np
import pandas as pd
from scipy.interpolate import griddata

# Define fixed SMAP grid file
smap_path = "smap_grid.csv"  # One SMAP file containing Latitude, Longitude
model_folder = "model_datewise"       # Folder containing old model CSVs to be regridded
output_folder = "regridded_model_to_smap_grid"
os.makedirs(output_folder, exist_ok=True)

# Load the fixed SMAP lat-lon grid
smap_df = pd.read_csv(smap_path)
smap_coords = smap_df[["Latitude", "Longitude"]].values

# Loop through model files
date_files = sorted(os.listdir(model_folder))
print(" Processing model files:", date_files)

for file in date_files:
    model_path = os.path.join(model_folder, file)

    if not os.path.exists(model_path) or os.stat(model_path).st_size == 0:
        print(f" Skipping {file}: Missing or empty")
        continue

    try:
        model_df = pd.read_csv(model_path)

        if model_df.empty:
            print(f" Skipping {file}: DataFrame empty")
            continue

        model_coords = model_df[["lat", "lon"]].values
        sm_surface = model_df["sm_surface"].values
        sm_rootzone = model_df["sm_rootzone"].values

        # Interpolate to SMAP grid
        try:
            smap_df["sm_surface_model"] = griddata(model_coords, sm_surface, smap_coords, method="linear")
            smap_df["sm_rootzone_model"] = griddata(model_coords, sm_rootzone, smap_coords, method="linear")
        except Exception as e:
            print(f" Linear interpolation failed for {file}: {e}, using nearest instead")
            smap_df["sm_surface_model"] = griddata(model_coords, sm_surface, smap_coords, method="nearest")
            smap_df["sm_rootzone_model"] = griddata(model_coords, sm_rootzone, smap_coords, method="nearest")

        # Save to output
        output_path = os.path.join(output_folder, file)
        smap_df.to_csv(output_path, index=False)
        print(f"Saved regridded model for {file}")

    except Exception as e:
        print(f" Error processing {file}: {e}")
        continue


 Processing model files: ['20150101.0.csv', '20150102.0.csv', '20150103.0.csv', '20150104.0.csv', '20150105.0.csv', '20150106.0.csv', '20150107.0.csv', '20150108.0.csv', '20150109.0.csv', '20150110.0.csv', '20150111.0.csv', '20150112.0.csv', '20150113.0.csv', '20150114.0.csv', '20150115.0.csv', '20150116.0.csv', '20150117.0.csv', '20150118.0.csv', '20150119.0.csv', '20150120.0.csv', '20150121.0.csv', '20150122.0.csv', '20150123.0.csv', '20150124.0.csv', '20150125.0.csv', '20150126.0.csv', '20150127.0.csv', '20150128.0.csv', '20150129.0.csv', '20150130.0.csv', '20150131.0.csv', '20150201.0.csv', '20150202.0.csv', '20150203.0.csv', '20150204.0.csv', '20150205.0.csv', '20150206.0.csv', '20150207.0.csv', '20150208.0.csv', '20150209.0.csv', '20150210.0.csv', '20150211.0.csv', '20150212.0.csv', '20150213.0.csv', '20150214.0.csv', '20150215.0.csv', '20150216.0.csv', '20150217.0.csv', '20150218.0.csv', '20150219.0.csv', '20150220.0.csv', '20150221.0.csv', '20150222.0.csv', '20150223.0.csv', '2

In [8]:
print("Unique latitudes:", len(np.unique(model_df["lat"])))
print("Unique longitudes:", len(np.unique(model_df["lon"])))


Unique latitudes: 1
Unique longitudes: 4
