In [1]:
import csv
import math

def read_sites_from_file(filename):
    sites = []
    with open(filename, 'r') as f:
        reader = csv.DictReader(f)
        for row in reader:
            sites.append({
                'site_id': row['site_id'],
                'latitude': float(row['latitude']),
                'longitude': float(row['longitude'])
            })
    return sites

def find_closest_site_id(sites, lat, lon):
    min_distance = float('inf')
    closest_site_id = None

    for site in sites:
        distance = math.sqrt((lat - site['latitude'])**2 + (lon - site['longitude'])**2)
        if distance < min_distance:
            min_distance = distance
            closest_site_id = site['site_id']

    return closest_site_id

def main():
    sites = read_sites_from_file('wtk_site_metadata.csv')

    nodes = {
    "ATLSHR" : (39.189972, -74.200000),
    "OCN1" : (39.100000, -74.299972),
    "OCN2" : (39.071, -74.424),
    "SKPJK1" : (38.649972, -74.700000),
    "SKPJK2" : (38.670000, -74.700000),
    "MRWN" : (38.252, -74.778),
    "MMTM" : (38.340000, -74.760000),
    "CVOW" : (36.947, -75.217),
    }

    matched_sites = []
    for location, (lat, lon) in nodes.items():
        site_id = find_closest_site_id(sites, lat, lon)
        print(f"Closest site_id for {location} (latitude {lat} and longitude {lon}) is: {site_id}")
        matched_sites.append((location, site_id))
        
    print(matched_sites)
        
  
        # Writing matched sites to a new CSV file
    with open('wtk_site_metadata.csv', 'r') as input_csv, open('wtk_site_metadata_matched.csv', 'w', newline='') as output_csv:
        reader = csv.DictReader(input_csv)
        writer = csv.DictWriter(output_csv, fieldnames=reader.fieldnames)
        writer.writeheader()

        for row in reader:
            for site in [int(site_id) for _, site_id in matched_sites]:
                if int(row['site_id']) == site:
                    writer.writerow(row)

if __name__ == '__main__':
    main()


Closest site_id for ATLSHR (latitude 39.189972 and longitude -74.2) is: 62537
Closest site_id for OCN1 (latitude 39.1 and longitude -74.299972) is: 61111
Closest site_id for OCN2 (latitude 39.071 and longitude -74.424) is: 60466
Closest site_id for SKPJK1 (latitude 38.649972 and longitude -74.7) is: 55106
Closest site_id for SKPJK2 (latitude 38.67 and longitude -74.7) is: 55106
Closest site_id for MRWN (latitude 38.252 and longitude -74.778) is: 50971
Closest site_id for MMTM (latitude 38.34 and longitude -74.76) is: 51892
Closest site_id for CVOW (latitude 36.947 and longitude -75.217) is: 43111
[('ATLSHR', '62537'), ('OCN1', '61111'), ('OCN2', '60466'), ('SKPJK1', '55106'), ('SKPJK2', '55106'), ('MRWN', '50971'), ('MMTM', '51892'), ('CVOW', '43111')]


In [2]:
conda install -c anaconda netcdf4

Retrieving notices: ...working... done
Collecting package metadata (current_repodata.json): done
Solving environment: done


  current version: 23.9.0
  latest version: 23.10.0

Please update conda by running

    $ conda update -n base -c anaconda conda

Or to minimize the number of packages updated during conda update use

     conda install conda=23.10.0



## Package Plan ##

  environment location: /Users/skhanal/anaconda3

  added / updated specs:
    - netcdf4


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2023.11.17         |  py311hca03da5_0         161 KB  anaconda
    openssl-3.0.12             |       h1a28f6b_0         4.5 MB  anaconda
    ------------------------------------------------------------
                                           Total:         4.7 MB

The following packages will be UPDATED:

  certifi                         2023.7.22-py311hca03da5_0 

In [1]:
import csv
import os
from netCDF4 import Dataset

def read_nc_file(file_path):
    """Read the netCDF file and return its data."""
    with Dataset(file_path, 'r') as nc_file:
        # Here we are assuming all variables in the netCDF file should be read.
        # If you only want specific variables, you can modify this section.
        return {var: nc_file.variables[var][:].tolist() for var in nc_file.variables}

def main():
    # List of your netCDF files
    nc_files = ["43111.nc", "50971.nc", "51892.nc", "55106.nc", "55106.nc", "60466.nc", "61111.nc", "62537.nc"]

    data_dicts = [read_nc_file(file) for file in nc_files]
    headers = [os.path.splitext(file)[0] for file in nc_files]

    # Assume all netCDF files have the same variables and length
    variable_names = list(data_dicts[0].keys())
    data_length = len(data_dicts[0][variable_names[0]])

    with open('wind_out.csv', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        
        # Write headers
        writer.writerow(['Variable'] + headers)

        for var in variable_names:
            for i in range(data_length):
                writer.writerow([var] + [data_dict[var][i] for data_dict in data_dicts])

if __name__ == '__main__':
    main()
