In [21]:
import xarray as xr
import numpy as np
import pandas as pd
import os

# Directory containing NetCDF files
directory_path = r"E:\SMOS20192022\SM_2021_A"

# Updated coordinates to extract data for  
coordinates = [
(42.53523,	-72.17393),
(42.53515,	-72.17387),
(42.53503,	-72.17385),
(42.51169,	-72.21495),
(42.50965,	-72.20412),
(42.43022,	-72.121),
(42.559151,	-71.954755),
(42.65856,	-72.1681),
(42.64761,	-72.15662),
(42.65373,	-72.12053),
(42.68092,	-72.01332),
(42.63366,	-72.16853),
(42.42988,	-71.95713),
(42.4334,	-72.15585),
(42.42128,	-72.1936),
(42.38341,	-72.07815),
(42.5176,	-72.20477),
(42.66071,	-72.14417),
(42.62835,	-72.02762),
(42.5074,	-72.10041),
(42.47143,	-72.11768),
(42.3804,	-71.9763),
(42.38297,	-72.19733),
(42.53523,	-72.17393),
(42.53523,	-72.17393)
]


# List of variables to extract from the file  
variables = ['Soil_Moisture', 'Tb_Asl_Theta_B_H', 'Tb_Asl_Theta_B_V', 'Optical_Thickness_Nad', 'Optical_Thickness_Nad2']  

# Function to find the nearest point  
def find_nearest_point(lat, lon, lat_arr, lon_arr):  
    # Calculate squared distance from the provided coordinates to each grid point  
    dist_sq = (lat_arr[:, np.newaxis] - lat)**2 + (lon_arr[np.newaxis, :] - lon)**2  
    return np.unravel_index(np.argmin(dist_sq), dist_sq.shape)  

# Initialize an empty list to store the results  
all_data = []  

# Loop through each NetCDF file in the directory  
for filename in os.listdir(directory_path):  
    if filename.endswith(".nc"):  
        file_path = os.path.join(directory_path, filename)  

        # Extracting the date from the filename  
        date_str = filename.split('_')[4]  # Extract the date (e.g., "20190501T000000" becomes "20190501T")  
        date = date_str[:8]  # Get the date in YYYYMMDD format  
        # Open the NetCDF file  
        ds = xr.open_dataset(file_path)  

        # Checking variable names available in the dataset  
        #print(f"Available variables in {filename}: {list(ds.variables)}")  

        # Extract latitude and longitude arrays  
        lat_arr = ds['lat'].values  
        lon_arr = ds['lon'].values  

        # Extracting data for each coordinate  
        for lat, lon in coordinates:  
            try:  
                # Find the nearest point indices for lat and lon  
                lat_idx, lon_idx = find_nearest_point(lat, lon, lat_arr, lon_arr)  
                data = {'File': filename, 'Date': date, 'Latitude': lat, 'Longitude': lon}  

                # Extracting data for each variable  
                for var in variables:  
                    if var in ds.variables:  
                        variable_data = ds[var]  
                        #print(f"Extracting {var} for ({lat}, {lon}) at indices ({lat_idx}, {lon_idx})")  

                        # Use the nearest lat_idx and lon_idx to extract the value  
                        data[var] = variable_data.isel(lat=lat_idx, lon=lon_idx).values if len(variable_data.shape) == 2 else np.nan   
                    else:  
                        data[var] = np.nan  # use NaN for missing variables  

                all_data.append(data)  
                #print(f"Data extracted for {lat}, {lon} from {filename}")  
            except Exception as e:  
                print(f"Error accessing data for coordinates ({lat}, {lon}) in file '{filename}': {e}")  

        ds.close()  

# Convert the list of dictionaries to a pandas DataFrame  
df = pd.DataFrame(all_data)  

# Write the DataFrame to an Excel file  
output_file = r"E:\SMOS20192022\Results\M2\SM2021A.xlsx"  
df.to_excel(output_file, index=False)  

print(f"Data has been successfully written to {output_file}")

Data has been successfully written to E:\SMOS20192022\Results\Millbrook\SM2022D.xlsx
