In [1]:

import os
import glob
import zipfile
import shutil
import pandas as pd
import numpy as np
from datetime import datetime
import xarray as xr
import logging
import re


In [47]:
import os
import glob
import shutil
import pandas as pd
import xarray as xr
import zipfile
import re
import logging

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s [%(levelname)s] %(message)s',
    handlers=[
        logging.StreamHandler(),
        logging.FileHandler("process_log.log", mode='w')
    ]
)

# Metadata dictionary
var_metadata = {
    "Seconds": {"long_name": "Elapsed time in seconds", "units": "s", "description": "Time elapsed in seconds from the start of the recording"},
    "Sequence_Number": {"long_name": "Sequence number", "units": "NA", "description": "Sequence number for the data record"},
    "Diagnostic_Value": {"long_name": "Diagnostic value", "units": "NA", "description": "Primary diagnostic value"},
    "Diagnostic_Value_2": {"long_name": "Secondary diagnostic value", "units": "NA", "description": "Secondary diagnostic value"},
    "DS_Diagnostic_Value": {"long_name": "DS diagnostic value", "units": "NA", "description": "Diagnostic value for the DS channel"},
    "CO2_Absorptance": {"long_name": "CO2 absorptance", "units": "NA", "description": "Absorptance measurement for CO2"},
    "H2O_Absorptance": {"long_name": "H2O absorptance", "units": "NA", "description": "Absorptance measurement for H2O"},
    "CO2_mmol_m^3": {"long_name": "CO2 concentration", "units": "mmol m-3", "description": "Carbon dioxide concentration in mmol per cubic meter"},
    "CO2_mg_m^3": {"long_name": "CO2 mass concentration", "units": "mg m-3", "description": "Mass concentration of carbon dioxide in mg per cubic meter"},
    "H2O_mmol_m^3": {"long_name": "H2O concentration", "units": "mmol m-3", "description": "Water vapor concentration in mmol per cubic meter"},
    "H2O_g_m^3": {"long_name": "H2O mass concentration", "units": "g m-3", "description": "Mass concentration of water vapor in g per cubic meter"},
    "Temperature_C": {"long_name": "Temperature", "units": "C", "description": "Ambient temperature in degrees Celsius"},
    "Pressure_kPa": {"long_name": "Pressure", "units": "kPa", "description": "Atmospheric pressure in kilopascals"},
    "Cooler_Voltage_V": {"long_name": "Cooler voltage", "units": "V", "description": "Voltage applied to the cooler"},
    "Chopper_Cooler_Voltage_V": {"long_name": "Chopper cooler voltage", "units": "V", "description": "Voltage applied to the chopper cooler"},
    "Vin_SmartFlux_V": {"long_name": "Vin SmartFlux", "units": "V", "description": "Input voltage for SmartFlux"},
    "CO2_umol_mol": {"long_name": "CO2 mole fraction", "units": "umol mol-1", "description": "Mole fraction of CO2 in micromoles per mole"},
    "H2O_mmol_mol": {"long_name": "H2O mole fraction", "units": "mmol mol-1", "description": "Mole fraction of water vapor in mmol per mole"},
    "Dew_Point_C": {"long_name": "Dew point temperature", "units": "C", "description": "Dew point temperature in degrees Celsius"},
    "CO2_Signal_Strength": {"long_name": "CO2 signal strength", "units": "NA", "description": "Signal strength measurement for CO2"},
    "H2O_Sample": {"long_name": "H2O sample signal", "units": "NA", "description": "Sample signal for water vapor"},
    "H2O_Reference": {"long_name": "H2O reference signal", "units": "NA", "description": "Reference signal for water vapor"},
    "CO2_Sample": {"long_name": "CO2 sample signal", "units": "NA", "description": "Sample signal for carbon dioxide"},
    "CO2_Reference": {"long_name": "CO2 reference signal", "units": "NA", "description": "Reference signal for carbon dioxide"},
    "Vin_DSI_V": {"long_name": "Vin DSI", "units": "V", "description": "Input voltage for DSI"},
    "U_m_s": {"long_name": "U wind speed", "units": "m s-1", "description": "Wind speed in the U direction in meters per second"},
    "V_m_s": {"long_name": "V wind speed", "units": "m s-1", "description": "Wind speed in the V direction in meters per second"},
    "W_m_s": {"long_name": "W wind speed", "units": "m s-1", "description": "Wind speed in the W direction in meters per second"},
    "T_C": {"long_name": "Temperature", "units": "C", "description": "Temperature in degrees Celsius"},
    "Anemometer_Diagnostics": {"long_name": "Anemometer diagnostics", "units": "NA", "description": "Diagnostics for the anemometer"},
    "CHK": {"long_name": "Checksum", "units": "NA", "description": "Data integrity checksum"}
}

def extract_all_zip_files_for_month(root_dir, year_month, temp_csv_dir):
    """Extracts all .ghg files for a given month."""
    month_dir = os.path.join(root_dir, "raw", year_month)
    if os.path.isdir(month_dir):
        zip_files = glob.glob(os.path.join(month_dir, "*.ghg"))
        logging.info(f"Found {len(zip_files)} ZIP files in {month_dir}")
        for zip_file in zip_files:
            logging.info(f"Extracting {zip_file}")
            try:
                with zipfile.ZipFile(zip_file, "r") as zip_ref:
                    zip_ref.extractall(temp_csv_dir)
            except zipfile.BadZipFile as e:
                logging.error(f"Failed to extract {zip_file}: {e}")

def sanitize_column_name(name):
    """Sanitize column names."""
    return name.strip().replace(' ', '_').replace('(', '').replace(')', '').replace('/', '_')

def read_data_file(data_file_path):
    """Reads the .data file and returns a DataFrame."""
    logging.info(f"Reading data file {data_file_path}")
    try:
        with open(data_file_path, 'r') as file:
            lines = file.readlines()
    except FileNotFoundError as e:
        logging.error(f"Data file not found: {e}")
        return None, None

    # Extract metadata
    file_metadata = {}
    for line in lines[:6]:
        if ':' in line:
            key, value = line.split(':', 1)
            file_metadata[key.strip()] = value.strip()

    # Read data into DataFrame
    try:
        df = pd.read_csv(data_file_path, skiprows=7, sep='\t')
    except pd.errors.ParserError as e:
        logging.error(f"Failed to parse data file {data_file_path}: {e}")
        return None, file_metadata

    # Sanitize and rename columns
    df.columns = [sanitize_column_name(col) for col in df.columns]
    rename_dict = {original: key for key, metadata in var_metadata.items() for original in df.columns if original in metadata.get("long_name", "").replace(' ', '_')}
    df.rename(columns=rename_dict, inplace=True)

    # Combine 'Date', 'Time', and 'Nanoseconds' into 'time'
    try:
        df['time'] = (
            pd.to_datetime(df['Date'] + ' ' + df['Time'], format='%Y-%m-%d %H:%M:%S:%f')
            - pd.Timestamp('1970-01-01')
        ).dt.total_seconds() #+ df['Nanoseconds'] / 1e9  # Add nanoseconds as fractional seconds

        df['time'] = df['time'].astype('float64')
        df.set_index('time', inplace=True)

        df.drop(columns=['Date', 'Time', 'Nanoseconds', 'DATAH'], inplace=True, errors='ignore')
    except Exception as e:
        logging.error(f"Error processing time index in {data_file_path}: {e}")
        return None, file_metadata

    logging.info(f"Read and processed {data_file_path} with {len(df)} records")
    return df, file_metadata

def read_metadata_file(metadata_file_path):
    """Reads metadata from a file."""
    logging.info(f"Reading metadata file {metadata_file_path}")
    desired_keys = {
        'site_name': r'site_name\s*=\s*(.+)',
        'altitude': r'altitude\s*=\s*(.+)',
        'latitude': r'latitude\s*=\s*(.+)',
        'longitude': r'longitude\s*=\s*(.+)',
        'station_name': r'station_name\s*=\s*(.+)',
        'logger_id': r'logger_id\s*=\s*(.+)',
        'acquisition_frequency': r'acquisition_frequency\s*=\s*(.+)',
        'file_duration': r'file_duration\s*=\s*(.+)',
        'instr_1_manufacturer': r'instr_1_manufacturer\s*=\s*(.+)',
        'instr_1_model': r'instr_1_model\s*=\s*(.+)',
        'instr_1_sn': r'instr_1_sn\s*=\s*(.+)',
        'instr_2_manufacturer': r'instr_2_manufacturer\s*=\s*(.+)',
        'instr_2_model': r'instr_2_model\s*=\s*(.+)',
        'instr_2_sn': r'instr_2_sn\s*=\s*(.+)',
    }

    metadata = {}

    try:
        with open(metadata_file_path, 'r') as file:
            for line in file:
                for key, pattern in desired_keys.items():
                    match = re.search(pattern, line)
                    if match:
                        metadata[key] = match.group(1).strip()
    except FileNotFoundError as e:
        logging.error(f"Metadata file not found: {e}")
        return None

    logging.info(f"Extracted metadata from {metadata_file_path}")
    return metadata

def combine_data_files(data_file_paths, metadata_file_paths):
    """Combines all .data files into a single DataFrame."""
    dataframes = []
    combined_metadata = {}

    for data_file_path, metadata_file_path in zip(data_file_paths, metadata_file_paths):
        df, file_metadata = read_data_file(data_file_path)
        if df is not None:
            dataframes.append(df)
        else:
            logging.warning(f"Skipping file {data_file_path} due to read errors")

        metadata = read_metadata_file(metadata_file_path)
        if metadata:
            combined_metadata.update(file_metadata)
            combined_metadata.update(metadata)
        else:
            logging.warning(f"Metadata extraction failed for {metadata_file_path}")

    if not dataframes:
        logging.error("No dataframes were created, cannot continue with empty data")
        return None, None

    combined_df = pd.concat(dataframes, ignore_index=False)
    combined_df.sort_index(inplace=True)

    logging.info(f"Combined DataFrame time range: {combined_df.index.min()} to {combined_df.index.max()}")
    return combined_df, combined_metadata

def df_to_xarray(df, metadata):
    """Converts the DataFrame to an xarray Dataset."""
    if df is None or df.empty:
        logging.error("Empty DataFrame cannot be converted to xarray Dataset")
        return None

    ds = xr.Dataset.from_dataframe(df)

    ds = ds.assign_coords(time=("time", df.index))

    for key, value in metadata.items():
        ds.attrs[key] = value

    for var_name in df.columns:
        if var_name in var_metadata:
            ds[var_name].attrs.update(var_metadata[var_name])
        else:
            logging.warning(f"Variable {var_name} not found in var_metadata")

    ds.time.attrs.update({
        "long_name": "time",
        "units": "seconds since 1970-01-01 00:00:00",
        "calendar": "standard"
    })

    logging.info(f"Converted to xarray Dataset with time range: {ds['time'].min().values} to {ds['time'].max().values}")
    return ds

def process_files_for_month(root_dir, year_month):
    """Processes files and creates a NetCDF file."""
    temp_data_dir = os.path.join(root_dir, "temp", "data")
    os.makedirs(temp_data_dir, exist_ok=True)

    extract_all_zip_files_for_month(root_dir, year_month, temp_data_dir)

    data_files = glob.glob(os.path.join(temp_data_dir, "*.data"))
    metadata_files = [f.replace('.data', '.metadata') for f in data_files]

    if not data_files:
        logging.error("No data files found for the given month.")
        return

    combined_df, metadata = combine_data_files(data_files, metadata_files)
    if combined_df is None:
        logging.error("No data to process into NetCDF")
        return

    combined_ds = df_to_xarray(combined_df, metadata)
    if combined_ds is None:
        logging.error("Failed to convert data to xarray Dataset")
        return

    nc_dir = os.path.join(root_dir, "netcdf")
    os.makedirs(nc_dir, exist_ok=True)
    netcdf_filename = f"smartflux_rawdata_{year_month.replace('/', '_')}.nc"
    netcdf_filepath = os.path.join(nc_dir, netcdf_filename)

    try:
        # Save with compression
        encoding = {var: {"zlib": True, "complevel": 5} for var in combined_ds.data_vars}
        combined_ds.to_netcdf(netcdf_filepath, unlimited_dims=["time"], encoding=encoding)
        logging.info(f"Written combined dataset to {netcdf_filepath}")
    except Exception as e:
        logging.error(f"Failed to write NetCDF file {netcdf_filepath}: {e}")

    shutil.rmtree(temp_data_dir)

# Main execution
root_dir = "/Users/bhupendra/projects/crocus/data/flux_data/data"
year_month = "2024/07"

process_files_for_month(root_dir, year_month)


# This code is testing raw files

In [30]:
import os
import glob
import shutil
import pandas as pd
import xarray as xr
import zipfile
import re
import logging

# Configure logging to output to both the console and a file
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s [%(levelname)s] %(message)s',
    handlers=[
        logging.StreamHandler(),
        logging.FileHandler("process_log.log", mode='w')
    ]
)

def extract_all_zip_files_for_month(root_dir, year_month, temp_csv_dir):
    """Extracts all .ghg files for a given month into a temporary directory."""
    month_dir = os.path.join(root_dir, "raw", year_month)
    if os.path.isdir(month_dir):
        zip_files = glob.glob(os.path.join(month_dir, "*.ghg"))
        logging.info(f"Found {len(zip_files)} ZIP files in {month_dir}")
        for zip_file in zip_files:
            logging.info(f"Extracting {zip_file}")
            try:
                with zipfile.ZipFile(zip_file, "r") as zip_ref:
                    zip_ref.extractall(temp_csv_dir)
            except zipfile.BadZipFile as e:
                logging.error(f"Failed to extract {zip_file}: {e}")

def read_data_file(data_file_path):
    """Reads the .data file, extracts metadata, and returns a DataFrame with a datetime index."""
    logging.info(f"Reading data file {data_file_path}")
    try:
        with open(data_file_path, 'r') as file:
            lines = file.readlines()
    except FileNotFoundError as e:
        logging.error(f"Data file not found: {e}")
        return None, None

    # Extract metadata from the first few lines
    file_metadata = {}
    for line in lines[:6]:  # First 6 lines contain metadata
        if ':' in line:
            key, value = line.split(':', 1)
            file_metadata[key.strip()] = value.strip()

    # Read data into a DataFrame, skipping metadata lines and using the correct header line
    try:
        df = pd.read_csv(data_file_path, skiprows=7, sep='\t')
    except pd.errors.ParserError as e:
        logging.error(f"Failed to parse data file {data_file_path}: {e}")
        return None, file_metadata

    # Combine 'Date', 'Time', and 'Nanoseconds' into a single datetime column
    try:
        df['datetime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], format='%Y-%m-%d %H:%M:%S:%f') + pd.to_timedelta(df['Nanoseconds'].astype(int), unit='ns')
        df.set_index('datetime', inplace=True)
        df.drop(columns=['Date', 'Time', 'Nanoseconds', 'DATAH'], inplace=True, errors='ignore')
    except Exception as e:
        logging.error(f"Error processing datetime index in {data_file_path}: {e}")
        return None, file_metadata

    logging.info(f"Read and processed {data_file_path} with {len(df)} records")
    return df, file_metadata

def read_metadata_file(metadata_file_path):
    """Reads metadata from specific sections of a file and returns a dictionary."""
    logging.info(f"Reading metadata file {metadata_file_path}")
    desired_keys = {
        'site_name': r'site_name\s*=\s*(.+)',
        'altitude': r'altitude\s*=\s*(.+)',
        'latitude': r'latitude\s*=\s*(.+)',
        'longitude': r'longitude\s*=\s*(.+)',
        'station_name': r'station_name\s*=\s*(.+)',
        'logger_id': r'logger_id\s*=\s*(.+)',
        'acquisition_frequency': r'acquisition_frequency\s*=\s*(.+)',
        'file_duration': r'file_duration\s*=\s*(.+)',
        'instr_1_manufacturer': r'instr_1_manufacturer\s*=\s*(.+)',
        'instr_1_model': r'instr_1_model\s*=\s*(.+)',
        'instr_1_sn': r'instr_1_sn\s*=\s*(.+)',
        'instr_2_manufacturer': r'instr_2_manufacturer\s*=\s*(.+)',
        'instr_2_model': r'instr_2_model\s*=\s*(.+)',
        'instr_2_sn': r'instr_2_sn\s*=\s*(.+)',
    }

    metadata = {}

    try:
        with open(metadata_file_path, 'r') as file:
            for line in file:
                for key, pattern in desired_keys.items():
                    match = re.search(pattern, line)
                    if match:
                        metadata[key] = match.group(1).strip()
    except FileNotFoundError as e:
        logging.error(f"Metadata file not found: {e}")
        return None

    logging.info(f"Extracted metadata from {metadata_file_path}")
    return metadata

def combine_data_files(data_file_paths, metadata_file_paths):
    """Combines all .data files into a single DataFrame and gathers metadata."""
    dataframes = []
    combined_metadata = {}

    for data_file_path, metadata_file_path in zip(data_file_paths, metadata_file_paths):
        df, file_metadata = read_data_file(data_file_path)
        if df is not None:
            dataframes.append(df)
        else:
            logging.warning(f"Skipping file {data_file_path} due to read errors")

        metadata = read_metadata_file(metadata_file_path)
        if metadata:
            combined_metadata.update(file_metadata)
            combined_metadata.update(metadata)
        else:
            logging.warning(f"Metadata extraction failed for {metadata_file_path}")

    if not dataframes:
        logging.error("No dataframes were created, cannot continue with empty data")
        return None, None

    combined_df = pd.concat(dataframes, ignore_index=True)
    combined_df = combined_df.sort_index()

    logging.info(f"Combined DataFrame time range: {combined_df.index.min()} to {combined_df.index.max()}")
    return combined_df, combined_metadata

def df_to_xarray(df, metadata):
    """Converts the DataFrame to an xarray Dataset and attaches metadata."""
    if df is None or df.empty:
        logging.error("Empty DataFrame cannot be converted to xarray Dataset")
        return None

    ds = xr.Dataset.from_dataframe(df)

    # Attach metadata as global attributes
    for key, value in metadata.items():
        ds.attrs[key] = value

    # Attach units and descriptions as variable attributes based on column names
    for column in df.columns:
        if '(' in column and ')' in column:
            name, unit = column.rsplit('(', 1)
            var_name = name.strip()
            if var_name in ds.variables:
                ds[var_name].attrs["units"] = unit.strip(')')
                ds[var_name].attrs["long_name"] = var_name
            else:
                logging.warning(f"Variable {var_name} not found in dataset")

    logging.info(f"Converted to xarray Dataset with dimensions: {ds.dims}")
    return ds

def process_files_for_month(root_dir, year_month):
    """Main function to process files and create a NetCDF file."""
    temp_data_dir = os.path.join(root_dir, "temp", "data")
    os.makedirs(temp_data_dir, exist_ok=True)

    extract_all_zip_files_for_month(root_dir, year_month, temp_data_dir)

    data_files = glob.glob(os.path.join(temp_data_dir, "*.data"))
    metadata_files = [f.replace('.data', '.metadata') for f in data_files]

    if not data_files:
        logging.error("No data files found for the given month.")
        return

    combined_df, metadata = combine_data_files(data_files, metadata_files)
    if combined_df is None:
        logging.error("No data to process into NetCDF")
        return

    combined_ds = df_to_xarray(combined_df, metadata)
    if combined_ds is None:
        logging.error("Failed to convert data to xarray Dataset")
        return

    nc_dir = os.path.join(root_dir, "netcdf")
    os.makedirs(nc_dir, exist_ok=True)
    netcdf_filename = f"smartflux_data_{year_month.replace('/', '_')}.nc"
    netcdf_filepath = os.path.join(nc_dir, netcdf_filename)

    try:
        combined_ds.to_netcdf(netcdf_filepath, unlimited_dims=["time"])
        logging.info(f"Written combined dataset to {netcdf_filepath}")
    except Exception as e:
        logging.error(f"Failed to write NetCDF file {netcdf_filepath}: {e}")

    shutil.rmtree(temp_data_dir)

# Main script execution
root_dir = "/Users/bhupendra/projects/crocus/data/flux_data/data"
year_month = "2024/07"

process_files_for_month(root_dir, year_month)


ERROR:root:Failed to write NetCDF file /Users/bhupendra/projects/crocus/data/flux_data/data/netcdf/smartflux_data_2024_07.nc: Forward slashes '/' are not allowed in variable and dimension names (got 'CO2 (mmol/m^3)'). Forward slashes are used as hierarchy-separators for HDF5-based files ('netcdf4'/'h5netcdf').


# below code was testing results files

In [42]:
import re

def generate_smartflux_metadata():
    metadata = {
        'filename': {'long_name': 'file name', 'units': 'NA', 'description': 'Name of the file'},
        'date': {'long_name': 'date', 'units': 'NA', 'description': 'Date of the measurement'},
        'time': {'long_name': 'time', 'units': 'NA', 'description': 'Time of the measurement'},
        'DOY': {'long_name': 'day of year', 'units': 'ddd.ddd', 'description': 'Day of the year'},
        'daytime': {'long_name': 'daytime flag', 'units': '1=daytime', 'description': 'Daytime flag (1=daytime)'},
        'file_records': {'long_name': 'file records', 'units': '#', 'description': 'Number of records in the file'},
        'used_records': {'long_name': 'used records', 'units': '#', 'description': 'Number of records used'},
        'Tau': {'long_name': 'momentum flux', 'units': 'kg m-1s-2', 'description': 'Momentum flux'},
        'qc_Tau': {'long_name': 'quality control momentum flux', 'units': '#', 'description': 'Quality control for momentum flux'},
        'rand_err_Tau': {'long_name': 'random error momentum flux', 'units': 'kg m-1s-2', 'description': 'Random error in momentum flux'},
        'H': {'long_name': 'sensible heat flux', 'units': 'W m-2', 'description': 'Sensible heat flux'},
        'qc_H': {'long_name': 'quality control sensible heat flux', 'units': '#', 'description': 'Quality control for sensible heat flux'},
        'rand_err_H': {'long_name': 'random error sensible heat flux', 'units': 'W m-2', 'description': 'Random error in sensible heat flux'},
        'LE': {'long_name': 'latent heat flux', 'units': 'W m-2', 'description': 'Latent heat flux'},
        'qc_LE': {'long_name': 'quality control latent heat flux', 'units': '#', 'description': 'Quality control for latent heat flux'},
        'rand_err_LE': {'long_name': 'random error latent heat flux', 'units': 'W m-2', 'description': 'Random error in latent heat flux'},
        'co2_flux': {'long_name': 'CO2 flux', 'units': 'µmol s-1m-2', 'description': 'CO2 flux'},
        'qc_co2_flux': {'long_name': 'quality control CO2 flux', 'units': '#', 'description': 'Quality control for CO2 flux'},
        'rand_err_co2_flux': {'long_name': 'random error CO2 flux', 'units': 'µmol s-1m-2', 'description': 'Random error in CO2 flux'},
        'h2o_flux': {'long_name': 'H2O flux', 'units': 'mmol s-1m-2', 'description': 'H2O flux'},
        'qc_h2o_flux': {'long_name': 'quality control H2O flux', 'units': '#', 'description': 'Quality control for H2O flux'},
        'rand_err_h2o_flux': {'long_name': 'random error H2O flux', 'units': 'mmol s-1m-2', 'description': 'Random error in H2O flux'},
        'ch4_flux': {'long_name': 'CH4 flux', 'units': 'µmol s-1m-2', 'description': 'CH4 flux'},
        'qc_ch4_flux': {'long_name': 'quality control CH4 flux', 'units': '#', 'description': 'Quality control for CH4 flux'},
        'rand_err_ch4_flux': {'long_name': 'random error CH4 flux', 'units': 'µmol s-1m-2', 'description': 'Random error in CH4 flux'},
        'none_flux': {'long_name': 'none flux', 'units': 'µmol s-1m-2', 'description': 'None flux'},
        'qc_none_flux': {'long_name': 'quality control none flux', 'units': '#', 'description': 'Quality control for None flux'},
        'rand_err_none_flux': {'long_name': 'random error none flux', 'units': 'µmol s-1m-2', 'description': 'Random error in None flux'},
        'H_strg': {'long_name': 'sensible heat storage', 'units': 'W m-2', 'description': 'Sensible heat storage'},
        'LE_strg': {'long_name': 'latent heat storage', 'units': 'W m-2', 'description': 'Latent heat storage'},
        'co2_strg': {'long_name': 'CO2 storage', 'units': 'µmol s-1m-2', 'description': 'CO2 storage'},
        'h2o_strg': {'long_name': 'H2O storage', 'units': 'mmol s-1m-2', 'description': 'H2O storage'},
        'ch4_strg': {'long_name': 'CH4 storage', 'units': 'µmol s-1m-2', 'description': 'CH4 storage'},
        'none_strg': {'long_name': 'none storage', 'units': 'µmol s-1m-2', 'description': 'None storage'},
        'co2_vadv': {'long_name': 'CO2 vertical advection', 'units': 'µmol s-1m-2', 'description': 'CO2 vertical advection'},
        'h2o_vadv': {'long_name': 'H2O vertical advection', 'units': 'mmol s-1m-2', 'description': 'H2O vertical advection'},
        'ch4_vadv': {'long_name': 'CH4 vertical advection', 'units': 'µmol s-1m-2', 'description': 'CH4 vertical advection'},
        'none_vadv': {'long_name': 'none vertical advection', 'units': 'µmol s-1m-2', 'description': 'None vertical advection'},
        'co2_molar_density': {'long_name': 'CO2 molar density', 'units': 'mmol m-3', 'description': 'CO2 molar density'},
        'co2_mole_fraction': {'long_name': 'CO2 mole fraction', 'units': 'µmol mol_a-1', 'description': 'CO2 mole fraction'},
        'co2_mixing_ratio': {'long_name': 'CO2 mixing ratio', 'units': 'µmol mol_d-1', 'description': 'CO2 mixing ratio'},
        'co2_time_lag': {'long_name': 'CO2 time lag', 'units': 's', 'description': 'CO2 time lag'},
        'co2_def_timelag': {'long_name': 'CO2 default time lag', 'units': '1=default', 'description': 'CO2 default time lag flag'},
        'h2o_molar_density': {'long_name': 'H2O molar density', 'units': 'mmol m-3', 'description': 'H2O molar density'},
        'h2o_mole_fraction': {'long_name': 'H2O mole fraction', 'units': 'mmol mol_a-1', 'description': 'H2O mole fraction'},
        'h2o_mixing_ratio': {'long_name': 'H2O mixing ratio', 'units': 'mmol mol_d-1', 'description': 'H2O mixing ratio'},
        'h2o_time_lag': {'long_name': 'H2O time lag', 'units': 's', 'description': 'H2O time lag'},
        'h2o_def_timelag': {'long_name': 'H2O default time lag', 'units': '1=default', 'description': 'H2O default time lag flag'},
        'ch4_molar_density': {'long_name': 'CH4 molar density', 'units': 'mmol m-3', 'description': 'CH4 molar density'},
        'ch4_mole_fraction': {'long_name': 'CH4 mole fraction', 'units': 'µmol mol_a-1', 'description': 'CH4 mole fraction'},
        'ch4_mixing_ratio': {'long_name': 'CH4 mixing ratio', 'units': 'µmol mol_d-1', 'description': 'CH4 mixing ratio'},
        'ch4_time_lag': {'long_name': 'CH4 time lag', 'units': 's', 'description': 'CH4 time lag'},
        'ch4_def_timelag': {'long_name': 'CH4 default time lag', 'units': '1=default', 'description': 'CH4 default time lag flag'},
        'none_molar_density': {'long_name': 'none molar density', 'units': 'mmol m-3', 'description': 'None molar density'},
        'none_mole_fraction': {'long_name': 'none mole fraction', 'units': 'µmol mol_a-1', 'description': 'None mole fraction'},
        'none_mixing_ratio': {'long_name': 'none mixing ratio', 'units': 'µmol mol_d-1', 'description': 'None mixing ratio'},
        'none_time_lag': {'long_name': 'none time lag', 'units': 's', 'description': 'None time lag'},
        'none_def_timelag': {'long_name': 'none default time lag', 'units': '1=default', 'description': 'None default time lag flag'},
        'sonic_temperature': {'long_name': 'sonic temperature', 'units': 'K', 'description': 'Sonic temperature'},
        'air_temperature': {'long_name': 'air temperature', 'units': 'K', 'description': 'Air temperature'},
        'air_pressure': {'long_name': 'air pressure', 'units': 'Pa', 'description': 'Air pressure'},
        'air_density': {'long_name': 'air density', 'units': 'kg m-3', 'description': 'Air density'},
        'air_heat_capacity': {'long_name': 'air heat capacity', 'units': 'J kg-1K-1', 'description': 'Air heat capacity'},
        'air_molar_volume': {'long_name': 'air molar volume', 'units': 'm+3mol-1', 'description': 'Air molar volume'},
        'ET': {'long_name': 'evapotranspiration', 'units': 'mm hour-1', 'description': 'Evapotranspiration'},
        'water_vapor_density': {'long_name': 'water vapor density', 'units': 'kg m-3', 'description': 'Water vapor density'},
        'e': {'long_name': 'vapor pressure', 'units': 'Pa', 'description': 'Vapor pressure'},
        'es': {'long_name': 'saturation vapor pressure', 'units': 'Pa', 'description': 'Saturation vapor pressure'},
        'specific_humidity': {'long_name': 'specific humidity', 'units': 'kg kg-1', 'description': 'Specific humidity'},
        'RH': {'long_name': 'relative humidity', 'units': '%', 'description': 'Relative humidity'},
        'VPD': {'long_name': 'vapor pressure deficit', 'units': 'Pa', 'description': 'Vapor pressure deficit'},
        'Tdew': {'long_name': 'dew point temperature', 'units': 'K', 'description': 'Dew point temperature'},
        'u_unrot': {'long_name': 'unrotated u wind component', 'units': 'm s-1', 'description': 'Unrotated u wind component'},
        'v_unrot': {'long_name': 'unrotated v wind component', 'units': 'm s-1', 'description': 'Unrotated v wind component'},
        'w_unrot': {'long_name': 'unrotated w wind component', 'units': 'm s-1', 'description': 'Unrotated w wind component'},
        'u_rot': {'long_name': 'rotated u wind component', 'units': 'm s-1', 'description': 'Rotated u wind component'},
        'v_rot': {'long_name': 'rotated v wind component', 'units': 'm s-1', 'description': 'Rotated v wind component'},
        'w_rot': {'long_name': 'rotated w wind component', 'units': 'm s-1', 'description': 'Rotated w wind component'},
        'wind_speed': {'long_name': 'wind speed', 'units': 'm s-1', 'description': 'Wind speed'},
        'max_wind_speed': {'long_name': 'maximum wind speed', 'units': 'm s-1', 'description': 'Maximum wind speed'},
        'wind_dir': {'long_name': 'wind direction', 'units': 'deg from north', 'description': 'Wind direction'},
        'yaw': {'long_name': 'yaw', 'units': 'deg', 'description': 'Yaw'},
        'pitch': {'long_name': 'pitch', 'units': 'deg', 'description': 'Pitch'},
        'roll': {'long_name': 'roll', 'units': 'deg', 'description': 'Roll'},
        'u*': {'long_name': 'friction velocity', 'units': 'm s-1', 'description': 'Friction velocity'},
        'TKE': {'long_name': 'turbulent kinetic energy', 'units': 'm+2s-2', 'description': 'Turbulent kinetic energy'},
        'L': {'long_name': 'Obukhov length', 'units': 'm', 'description': 'Obukhov length'},
        'z_d_per_L': {'long_name': 'stability parameter', 'units': '#', 'description': 'Stability parameter'},
        'bowen_ratio': {'long_name': 'Bowen ratio', 'units': 'K', 'description': 'Bowen ratio'},
        'T*': {'long_name': 'temperature scale', 'units': 'K', 'description': 'Temperature scale'},
        'model': {'long_name': 'model', 'units': '0=KJ/1=KM/2=HS', 'description': 'Model'},
        'x_peak': {'long_name': 'x peak', 'units': 'm', 'description': 'X peak'},
        'x_offset': {'long_name': 'x offset', 'units': 'm', 'description': 'X offset'},
        'x_10%': {'long_name': 'x 10%', 'units': 'm', 'description': 'X 10%'},
        'x_30%': {'long_name': 'x 30%', 'units': 'm', 'description': 'X 30%'},
        'x_50%': {'long_name': 'x 50%', 'units': 'm', 'description': 'X 50%'},
        'x_70%': {'long_name': 'x 70%', 'units': 'm', 'description': 'X 70%'},
        'x_90%': {'long_name': 'x 90%', 'units': 'm', 'description': 'X 90%'},
        'un_Tau': {'long_name': 'uncorrected momentum flux', 'units': 'kg m-1s-2', 'description': 'Uncorrected momentum flux'},
        'Tau_scf': {'long_name': 'momentum flux spectral correction factor', 'units': '#', 'description': 'Momentum flux spectral correction factor'},
        'un_H': {'long_name': 'uncorrected sensible heat flux', 'units': 'W m-2', 'description': 'Uncorrected sensible heat flux'},
        'H_scf': {'long_name': 'sensible heat flux spectral correction factor', 'units': '#', 'description': 'Sensible heat flux spectral correction factor'},
        'un_LE': {'long_name': 'uncorrected latent heat flux', 'units': 'W m-2', 'description': 'Uncorrected latent heat flux'},
        'LE_scf': {'long_name': 'latent heat flux spectral correction factor', 'units': '#', 'description': 'Latent heat flux spectral correction factor'},
        'un_co2_flux': {'long_name': 'uncorrected CO2 flux', 'units': 'µmol s-1m-2', 'description': 'Uncorrected CO2 flux'},
        'co2_scf': {'long_name': 'CO2 flux spectral correction factor', 'units': '#', 'description': 'CO2 flux spectral correction factor'},
        'un_h2o_flux': {'long_name': 'uncorrected H2O flux', 'units': 'mmol s-1m-2', 'description': 'Uncorrected H2O flux'},
        'h2o_scf': {'long_name': 'H2O flux spectral correction factor', 'units': '#', 'description': 'H2O flux spectral correction factor'},
        'un_ch4_flux': {'long_name': 'uncorrected CH4 flux', 'units': 'µmol s-1m-2', 'description': 'Uncorrected CH4 flux'},
        'ch4_scf': {'long_name': 'CH4 flux spectral correction factor', 'units': '#', 'description': 'CH4 flux spectral correction factor'},
        'un_none_flux': {'long_name': 'uncorrected none flux', 'units': 'µmol s-1m-2', 'description': 'Uncorrected None flux'},
        'none_scf': {'long_name': 'none flux spectral correction factor', 'units': '#', 'description': 'None flux spectral correction factor'},
        'spikes_hf': {'long_name': 'spikes high frequency', 'units': '8u/v/w/ts/co2/h2o/ch4/none', 'description': 'Spikes high frequency'},
        'amplitude_resolution_hf': {'long_name': 'amplitude resolution high frequency', 'units': '8u/v/w/ts/co2/h2o/ch4/none', 'description': 'Amplitude resolution high frequency'},
        'drop_out_hf': {'long_name': 'drop out high frequency', 'units': '8u/v/w/ts/co2/h2o/ch4/none', 'description': 'Drop out high frequency'},
        'absolute_limits_hf': {'long_name': 'absolute limits high frequency', 'units': '8u/v/w/ts/co2/h2o/ch4/none', 'description': 'Absolute limits high frequency'},
        'skewness_kurtosis_hf': {'long_name': 'skewness and kurtosis high frequency', 'units': '8u/v/w/ts/co2/h2o/ch4/none', 'description': 'Skewness and kurtosis high frequency'},
        'skewness_kurtosis_sf': {'long_name': 'skewness and kurtosis single frequency', 'units': '8u/v/w/ts/co2/h2o/ch4/none', 'description': 'Skewness and kurtosis single frequency'},
        'discontinuities_hf': {'long_name': 'discontinuities high frequency', 'units': '8u/v/w/ts/co2/h2o/ch4/none', 'description': 'Discontinuities high frequency'},
        'discontinuities_sf': {'long_name': 'discontinuities single frequency', 'units': '8u/v/w/ts/co2/h2o/ch4/none', 'description': 'Discontinuities single frequency'},
        'timelag_hf': {'long_name': 'time lag high frequency', 'units': '8co2/h2o/ch4/none', 'description': 'Time lag high frequency'},
        'timelag_sf': {'long_name': 'time lag single frequency', 'units': '8co2/h2o/ch4/none', 'description': 'Time lag single frequency'},
        'attack_angle_hf': {'long_name': 'attack angle high frequency', 'units': '8aa', 'description': 'Attack angle high frequency'},
        'non_steady_wind_hf': {'long_name': 'non-steady wind high frequency', 'units': '8U', 'description': 'Non-steady wind high frequency'},
        'u_spikes': {'long_name': 'u wind component spikes', 'units': '#', 'description': 'U wind component spikes'},
        'v_spikes': {'long_name': 'v wind component spikes', 'units': '#', 'description': 'V wind component spikes'},
        'w_spikes': {'long_name': 'w wind component spikes', 'units': '#', 'description': 'W wind component spikes'},
        'ts_spikes': {'long_name': 'sonic temperature spikes', 'units': '#', 'description': 'Sonic temperature spikes'},
        'co2_spikes': {'long_name': 'CO2 spikes', 'units': '#', 'description': 'CO2 spikes'},
        'h2o_spikes': {'long_name': 'H2O spikes', 'units': '#', 'description': 'H2O spikes'},
        'ch4_spikes': {'long_name': 'CH4 spikes', 'units': '#', 'description': 'CH4 spikes'},
        'none_spikes': {'long_name': 'none spikes', 'units': '#', 'description': 'None spikes'},
        'head_detect_LI7200': {'long_name': 'head detect LI7200', 'units': '#', 'description': 'Head detect LI7200'},
        't_out_LI7200': {'long_name': 'temperature out LI7200', 'units': '#', 'description': 'Temperature out LI7200'},
        't_in_LI7200': {'long_name': 'temperature in LI7200', 'units': '#', 'description': 'Temperature in LI7200'},
        'aux_in_LI7200': {'long_name': 'auxiliary in LI7200', 'units': '#', 'description': 'Auxiliary in LI7200'},
        'delta_p_LI7200': {'long_name': 'delta pressure LI7200', 'units': '#', 'description': 'Delta pressure LI7200'},
        'chopper_LI7200': {'long_name': 'chopper LI7200', 'units': '#', 'description': 'Chopper LI7200'},
        'detector_LI7200': {'long_name': 'detector LI7200', 'units': '#', 'description': 'Detector LI7200'},
        'pll_LI7200': {'long_name': 'phase-locked loop LI7200', 'units': '#', 'description': 'Phase-locked loop LI7200'},
        'sync_LI7200': {'long_name': 'synchronization LI7200', 'units': '#', 'description': 'Synchronization LI7200'},
        'chopper_LI7500': {'long_name': 'chopper LI7500', 'units': '#', 'description': 'Chopper LI7500'},
        'detector_LI7500': {'long_name': 'detector LI7500', 'units': '#', 'description': 'Detector LI7500'},
        'pll_LI7500': {'long_name': 'phase-locked loop LI7500', 'units': '#', 'description': 'Phase-locked loop LI7500'},
        'sync_LI7500': {'long_name': 'synchronization LI7500', 'units': '#', 'description': 'Synchronization LI7500'},
        'not_ready_LI7700': {'long_name': 'not ready LI7700', 'units': '#', 'description': 'Not ready LI7700'},
        'no_signal_LI7700': {'long_name': 'no signal LI7700', 'units': '#', 'description': 'No signal LI7700'},
        're_unlocked_LI7700': {'long_name': 're-unlocked LI7700', 'units': '#', 'description': 'Re-unlocked LI7700'},
        'bad_temp_LI7700': {'long_name': 'bad temperature LI7700', 'units': '#', 'description': 'Bad temperature LI7700'},
        'laser_temp_unregulated_LI7700': {'long_name': 'laser temperature unregulated LI7700', 'units': '#', 'description': 'Laser temperature unregulated LI7700'},
        'block_temp_unregulated_LI7700': {'long_name': 'block temperature unregulated LI7700', 'units': '#', 'description': 'Block temperature unregulated LI7700'},
        'motor_spinning_LI7700': {'long_name': 'motor spinning LI7700', 'units': '#', 'description': 'Motor spinning LI7700'},
        'pump_on_LI7700': {'long_name': 'pump on LI7700', 'units': '#', 'description': 'Pump on LI7700'},
        'top_heater_on_LI7700': {'long_name': 'top heater on LI7700', 'units': '#', 'description': 'Top heater on LI7700'},
        'bottom_heater_on_LI7700': {'long_name': 'bottom heater on LI7700', 'units': '#', 'description': 'Bottom heater on LI7700'},
        'calibrating_LI7700': {'long_name': 'calibrating LI7700', 'units': '#', 'description': 'Calibrating LI7700'},
        'motor_failure_LI7700': {'long_name': 'motor failure LI7700', 'units': '#', 'description': 'Motor failure LI7700'},
        'bad_aux_tc1_LI7700': {'long_name': 'bad auxiliary temperature channel 1 LI7700', 'units': '#', 'description': 'Bad auxiliary temperature channel 1 LI7700'},
        'bad_aux_tc2_LI7700': {'long_name': 'bad auxiliary temperature channel 2 LI7700', 'units': '#', 'description': 'Bad auxiliary temperature channel 2 LI7700'},
        'bad_aux_tc3_LI7700': {'long_name': 'bad auxiliary temperature channel 3 LI7700', 'units': '#', 'description': 'Bad auxiliary temperature channel 3 LI7700'},
        'box_connected_LI7700': {'long_name': 'box connected LI7700', 'units': '#', 'description': 'Box connected LI7700'},
        'mean_value_RSSI_LI7200': {'long_name': 'mean value RSSI LI7200', 'units': '#', 'description': 'Mean value RSSI LI7200'},
        'mean_value_LI7500': {'long_name': 'mean value LI7500', 'units': '#', 'description': 'Mean value LI7500'},
        'u_var': {'long_name': 'u component variance', 'units': 'm+2s-2', 'description': 'U component variance'},
        'v_var': {'long_name': 'v component variance', 'units': 'm+2s-2', 'description': 'V component variance'},
        'w_var': {'long_name': 'w component variance', 'units': 'm+2s-2', 'description': 'W component variance'},
        'ts_var': {'long_name': 'sonic temperature variance', 'units': 'K+2', 'description': 'Sonic temperature variance'},
        'co2_var': {'long_name': 'CO2 variance', 'units': '--', 'description': 'CO2 variance'},
        'h2o_var': {'long_name': 'H2O variance', 'units': '--', 'description': 'H2O variance'},
        'ch4_var': {'long_name': 'CH4 variance', 'units': '--', 'description': 'CH4 variance'},
        'none_var': {'long_name': 'none variance', 'units': '--', 'description': 'None variance'},
        'w_per_ts_cov': {'long_name': 'w and ts covariance', 'units': 'm s-1K ', 'description': 'W and ts covariance'},
        'w_per_co2_cov': {'long_name': 'w and CO2 covariance', 'units': '--', 'description': 'W and CO2 covariance'},
        'w_per_h2o_cov': {'long_name': 'w and H2O covariance', 'units': '--', 'description': 'W and H2O covariance'},
        'w_per_ch4_cov': {'long_name': 'w and CH4 covariance', 'units': '--', 'description': 'W and CH4 covariance'},
        'w_per_none_cov': {'long_name': 'w and none covariance', 'units': '--', 'description': 'W and none covariance'},
        'vin_sf_mean': {'long_name': 'vin_sf mean', 'units': '--', 'description': 'Vin_sf mean'},
        'co2_mean': {'long_name': 'CO2 mean', 'units': '--', 'description': 'CO2 mean'},
        'h2o_mean': {'long_name': 'H2O mean', 'units': '--', 'description': 'H2O mean'},
        'dew_point_mean': {'long_name': 'dew point mean', 'units': '--', 'description': 'Dew point mean'},
        'co2_signal_strength_7500_mean': {'long_name': 'CO2 signal strength 7500 mean', 'units': '--', 'description': 'CO2 signal strength 7500 mean'}
    }
    return metadata

# Example usage
metadata = generate_smartflux_metadata()




# Try other approach to get the one file per month

In [43]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jul 25 14:58:26 2024

@author: bhupendra
"""

import os
import glob
import zipfile
import shutil
import pandas as pd
import numpy as np
from datetime import datetime
import xarray as xr

def extract_all_zip_files_for_month(root_dir, year_month, temp_csv_dir):
    """
    Extracts all ZIP files for the specified month into a temporary directory.
    
    Args:
        root_dir (str): The root directory containing ZIP files.
        year_month (str): The year and month in 'YYYY/MM' format.
        temp_csv_dir (str): The directory to extract the ZIP files into.
    """
    month_dir = os.path.join(root_dir, 'results', year_month)
    if os.path.isdir(month_dir):
        zip_files = glob.glob(os.path.join(month_dir, '*.zip'))
        for zip_file in zip_files:
            with zipfile.ZipFile(zip_file, 'r') as zip_ref:
                zip_ref.extractall(temp_csv_dir)

def read_and_attach_headers(file_path, metadata):
    """
    Reads a CSV file, attaches headers from metadata, and converts datetime to seconds since 1970-01-01.
    
    Args:
        file_path (str): The path to the CSV file.
        metadata (dict): The metadata dictionary with variable names and units.
    
    Returns:
        pd.DataFrame: DataFrame with attached headers and converted time.
    """
    df = pd.read_csv(file_path, skiprows=[0, 1, 2], header=None)
    headers = list(metadata.keys())
    df.columns = headers
    df['time'] = pd.to_datetime(df['date'] + ' ' + df['time'])
    df['time'] = (df['time'] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s')
    df = df.drop(columns=['date'])
    print(f"Read and processed {file_path}")
    print(f"Data time range: {df['time'].min()} to {df['time'].max()}")
    return df

def drop_missing_columns(ds):
    """
    Drops columns from the xarray Dataset that are entirely NaN.
    
    Args:
        ds (xr.Dataset): The xarray Dataset.
    
    Returns:
        xr.Dataset: Dataset with NaN columns dropped.
    """
    nan_vars = [var for var in ds.data_vars if ds[var].isnull().all()]
    ds = ds.drop_vars(nan_vars)
    return ds

def combine_csv_files(file_paths, metadata):
    """
    Combines multiple CSV files into a single DataFrame, sorted by time.
    
    Args:
        file_paths (list): List of CSV file paths.
        metadata (dict): The metadata dictionary with variable names and units.
    
    Returns:
        pd.DataFrame: Combined and sorted DataFrame.
    """
    dataframes = []
    for file_path in file_paths:
        df = read_and_attach_headers(file_path, metadata)
        dataframes.append(df)
    combined_df = pd.concat(dataframes, ignore_index=True)
    combined_df = combined_df.sort_values(by='time').reset_index(drop=True)
    print(f"Combined DataFrame time range: {combined_df['time'].min()} to {combined_df['time'].max()}")
    return combined_df

def df_to_xarray(df, metadata):
    """
    Converts a DataFrame to an xarray Dataset and attaches metadata.
    
    Args:
        df (pd.DataFrame): The input DataFrame.
        metadata (dict): The metadata dictionary with variable names and units.
    
    Returns:
        xr.Dataset: The resulting xarray Dataset with metadata.
    """
    ds = xr.Dataset.from_dataframe(df.set_index('time'))
    for var_name in df.columns:
        if var_name in metadata:
            ds[var_name].attrs['long_name'] = metadata[var_name]['long_name']
            ds[var_name].attrs['units'] = metadata[var_name]['units']
            ds[var_name].attrs['description'] = metadata[var_name]['description']
    ds.time.attrs['long_name'] = 'time'
    ds.time.attrs['units'] = 'seconds since 1970-01-01 00:00:00'
    ds.time.attrs['calendar'] = 'standard'
    print(f"Converted to xarray Dataset with time range: {ds['time'].min().values} to {ds['time'].max().values}")
    ds = drop_missing_columns(ds)
    return ds

def process_files_for_month(root_dir, year_month, metadata):
    """
    Processes files for a specific month: extracts ZIPs, combines CSVs, converts to xarray, and writes NetCDF.
    
    Args:
        root_dir (str): The root directory containing the ZIP files.
        year_month (str): The year and month in 'YYYY/MM' format.
        metadata (dict): The metadata dictionary with variable names and units.
    """
    temp_csv_dir = os.path.join(root_dir, 'temp', 'csv')
    os.makedirs(temp_csv_dir, exist_ok=True)
    extract_all_zip_files_for_month(root_dir, year_month, temp_csv_dir)
    csv_files = glob.glob(os.path.join(temp_csv_dir, 'output', 'eddypro_exp_full_output*_exp.csv'))
    if not csv_files:
        print("No CSV files found for the given month.")
        return
    combined_df = combine_csv_files(csv_files, metadata)
    combined_ds = df_to_xarray(combined_df, metadata)
    nc_dir = os.path.join(root_dir, 'netcdf')
    os.makedirs(nc_dir, exist_ok=True)
    netcdf_filename = f"smartflux_data_{year_month.replace('/', '_')}.nc"
    netcdf_filepath = os.path.join(nc_dir, netcdf_filename)
    if os.path.exists(netcdf_filepath):
        os.remove(netcdf_filepath)
    combined_ds.to_netcdf(netcdf_filepath, unlimited_dims=['time'])
    print(f"Written combined dataset to {netcdf_filepath}")
    shutil.rmtree(temp_csv_dir)

# Main
root_dir = '/Users/bhupendra/data/delete1/'
year_month = '2024/08'
process_files_for_month(root_dir, year_month, metadata)


Read and processed /Users/bhupendra/data/delete1/temp/csv/output/eddypro_exp_full_output_2024-07-12T060043_exp.csv
Data time range: 1720764000 to 1720764000
Read and processed /Users/bhupendra/data/delete1/temp/csv/output/eddypro_exp_full_output_2024-07-04T173042_exp.csv
Data time range: 1720114200 to 1720114200
Read and processed /Users/bhupendra/data/delete1/temp/csv/output/eddypro_exp_full_output_2024-07-10T110045_exp.csv
Data time range: 1720609200 to 1720609200
Read and processed /Users/bhupendra/data/delete1/temp/csv/output/eddypro_exp_full_output_2024-07-01T160043_exp.csv
Data time range: 1719849600 to 1719849600
Read and processed /Users/bhupendra/data/delete1/temp/csv/output/eddypro_exp_full_output_2024-07-03T050039_exp.csv
Data time range: 1719982800 to 1719982800
Read and processed /Users/bhupendra/data/delete1/temp/csv/output/eddypro_exp_full_output_2024-07-12T213046_exp.csv
Data time range: 1720819800 to 1720819800
Read and processed /Users/bhupendra/data/delete1/temp/csv/

# For AmriFlux

In [None]:
    combined_df["PA"] = full_df_filtered["PA"]
    combined_df["TA"] = full_df_filtered["TA"]
    combined_df["RH"] = full_df_filtered["RH"]