In [1]:
import os
import numpy as np
import xarray as xr
import pandas as pd

In [2]:
# Define the folder path where your NetCDF files are located
folder_path = 'E:/SMOS20192022/SM2019A'

In [3]:
# Define the coordinates for which you need data
coordinates = [
    (53.95, -104.37),
    (42.56, -73),
    (41.80, -73.75)
]

In [4]:
# Initialize a dictionary to store the results
results = []


In [5]:
# Function to find the nearest index for given coordinate in 1D arrays
def find_nearest_index(lat_array, lon_array, lat_point, lon_point):
    lat_idx = np.abs(lat_array - lat_point).argmin()
    lon_idx = np.abs(lon_array - lon_point).argmin()
    return lat_idx, lon_idx

In [6]:
# Iterate over all NetCDF files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.nc'):
        file_path = os.path.join(folder_path, filename)
        ds = xr.open_dataset(file_path)
        
        # Retrieve the latitude and longitude variables
        lat_candidates = ['latitude', 'lat']
        lon_candidates = ['longitude', 'lon']
        
        lat_var, lon_var = None, None
        for lat_name in lat_candidates:
            if lat_name in ds.variables:
                lat_var = ds[lat_name].values
                break

        for lon_name in lon_candidates:
            if lon_name in ds.variables:
                lon_var = ds[lon_name].values
                break

        if lat_var is None or lon_var is None:
            print(f"Skipping file {filename} due to missing latitude/longitude variables.")
            continue

        # Print lat/lon variables shape for debugging
        print(f"File: {filename}")
        print(f"  Lat shape: {lat_var.shape}, Lon shape: {lon_var.shape}")
        
        # Extract data for the coordinates
        for lat_point, lon_point in coordinates:
            try:
                lat_idx, lon_idx = find_nearest_index(lat_var, lon_var, lat_point, lon_point)
                print(f"  Closest index for latitude {lat_point}: {lat_idx}, longitude {lon_point}: {lon_idx}")
                print(f"  Closest lat value: {lat_var[lat_idx]}, lon value: {lon_var[lon_idx]}")
                
                row = {'File': filename, 'Coordinates': (lat_point, lon_point)}
                
                for i in range(1, 15):
                    if 'BT_H' in ds.variables:
                        bt_h_data = ds['BT_H'].isel(lat=lat_idx, lon=lon_idx, inc=i-1).values
                        if np.isnan(bt_h_data):
                            print(f"    Extracted BT_H_inc{i}: NaN encountered, setting to None")
                            bt_h_data = None
                        else:
                            print(f"    Extracted BT_H_inc{i}: {bt_h_data}")
                        row[f'BT_H_inc{i}'] = bt_h_data
                    
                    if 'BT_V' in ds.variables:
                        bt_v_data = ds['BT_V'].isel(lat=lat_idx, lon=lon_idx, inc=i-1).values
                        if np.isnan(bt_v_data):
                            print(f"    Extracted BT_V_inc{i}: NaN encountered, setting to None")
                            bt_v_data = None
                        else:
                            print(f"    Extracted BT_V_inc{i}: {bt_v_data}")
                        row[f'BT_V_inc{i}'] = bt_v_data
                
                results.append(row)
                
            except KeyError as e:
                print(f"Skipping some variables in file {filename} due to missing key: {e}")
            except IndexError as e:
                print(f"Skipping file {filename} due to index error: {e}")

File: SM_RE07_MIR_CLF31A_20190501T000000_20190501T235959_330_001_7.DBL.nc
  Lat shape: (584,), Lon shape: (1388,)
  Closest index for latitude 53.95: 528, longitude -104.37: 291
  Closest lat value: 53.837646484375, lon value: -104.39481353759766
  Closest index for latitude 42.56: 489, longitude -73: 412
  Closest lat value: 42.439117431640625, lon value: -73.01152801513672
  Closest index for latitude 41.8: 487, longitude -73.75: 409
  Closest lat value: 41.91291046142578, lon value: -73.78962707519531
File: SM_RE07_MIR_CLF31A_20190502T000000_20190502T235959_330_001_7.DBL.nc
  Lat shape: (584,), Lon shape: (1388,)
  Closest index for latitude 53.95: 528, longitude -104.37: 291
  Closest lat value: 53.837646484375, lon value: -104.39481353759766
  Closest index for latitude 42.56: 489, longitude -73: 412
  Closest lat value: 42.439117431640625, lon value: -73.01152801513672
  Closest index for latitude 41.8: 487, longitude -73.75: 409
  Closest lat value: 41.91291046142578, lon value:

In [7]:
# Convert results to DataFrame
df = pd.DataFrame(results)

In [12]:
df

Unnamed: 0,File,Coordinates
0,SM_RE07_MIR_CLF31A_20190501T000000_20190501T23...,"(53.95, -104.37)"
1,SM_RE07_MIR_CLF31A_20190501T000000_20190501T23...,"(42.56, -73)"
2,SM_RE07_MIR_CLF31A_20190501T000000_20190501T23...,"(41.8, -73.75)"
3,SM_RE07_MIR_CLF31A_20190502T000000_20190502T23...,"(53.95, -104.37)"
4,SM_RE07_MIR_CLF31A_20190502T000000_20190502T23...,"(42.56, -73)"
...,...,...
730,SM_RE07_MIR_CLF31A_20191230T000000_20191230T23...,"(42.56, -73)"
731,SM_RE07_MIR_CLF31A_20191230T000000_20191230T23...,"(41.8, -73.75)"
732,SM_RE07_MIR_CLF31A_20191231T000000_20191231T23...,"(53.95, -104.37)"
733,SM_RE07_MIR_CLF31A_20191231T000000_20191231T23...,"(42.56, -73)"


In [11]:
# Save DataFrame to Excel file
output_file = 'extracted_data_optimized.xlsx'
df.to_excel(output_file, index=False)

print(f"Data successfully saved to {output_file}")

Data successfully saved to extracted_data_optimized.xlsx
