In [1]:
import pandas as pd
import icartt
import os
import warnings
import re
from datetime import datetime
import csv
from datetime import datetime, timedelta
from netCDF4 import Dataset
import numpy as np
from scipy import stats
import glob

In [5]:
RAW_PATH = rf'C:\Users\haika\Desktop\May_Research\may_datasets\time_averaged_data'
OUTPUT_PATH = rf'C:\Users\haika\Desktop\May_Research\may_datasets\unit_corrected_data'

UNIT_INFO_PATH = rf"C:\Users\haika\Desktop\May_Research\may_datasets\unit_info_list"

In [6]:
def read_unit_info_files():
    """
    Read all unit info CSV files and return a dictionary with campaign names as keys
    and unit conversion information as values.
    """
    unit_info_dict = {}
    
    # Get all CSV files in the unit info directory
    unit_info_files = glob.glob(os.path.join(UNIT_INFO_PATH, "*.csv"))
    
    for file_path in unit_info_files:
        # Extract campaign name from filename (remove .csv extension)
        campaign_name = os.path.splitext(os.path.basename(file_path))[0]
        
        try:
            # Read the CSV file
            df = pd.read_csv(file_path)
            
            # Store the dataframe in the dictionary with campaign name as key
            unit_info_dict[campaign_name] = df
            
            print(f"Loaded unit info for campaign: {campaign_name}")
            print(f"  Columns to convert: {len(df)} entries")
            
        except Exception as e:
            print(f"Error reading {file_path}: {e}")
    
    return unit_info_dict

def convert_units(df, value, old_unit, new_unit, row_index=None):
    """
    Convert values from old_unit to new_unit.
    Add conversion factors as needed for your specific unit conversions.
    """
    # Handle NaN values
    if pd.isna(value):
        return value
    
    conversion_key = f"{old_unit}_to_{new_unit}"
    
    # Define conversion factors (add more as needed)
    conversions = {
        'mb_to_hPa': 1.0,  # mb and hPa are the same
        'hPa_to_mb': 1.0,
        'Pa_to_mb': 0.01,  # Pa to millibar (1 Pa = 0.01 mb)
        'mb_to_Pa': 100.0,  # millibar to Pa (1 mb = 100 Pa)
        'ppbv_to_pptv': 1000.0,  # ppbv to pptv
        'pptv_to_ppbv': 0.001,   # pptv to ppbv
        'g/kg_to_ng/kg': 1e6,    # grams to nanograms per kg
        'ng/kg_to_g/kg': 1e-6,   # nanograms to grams per kg
        'ft_to_m': 0.3048,  # feet to meters (1 ft = 0.3048 m)
        'm_to_ft': 3.28084,  # meters to feet (1 m = 3.28084 ft)
        # Add more conversions as needed based on your data
    }
    
    if conversion_key in conversions:
        return value * conversions[conversion_key]
    elif old_unit == "ng/kg" and new_unit == "ng/m3":
        # Convert ng/kg to ng/m3 using calculated air density
        # ρ = P / (R * T) where P=pressure, R=gas constant, T=temperature
        
        if row_index is None:
            print("Warning: Row index needed for ng/kg to ng/m3 conversion")
            return value
        
        # Check if required columns exist
        if 'Pressure' not in df.columns or 'Temperature' not in df.columns:
            print("Warning: Pressure and/or Temperature columns not found, using standard air density")
            air_density = 1.225  # kg/m³ fallback
        else:
            try:
                pressure = df.loc[row_index, 'Pressure']  # Pressure in mbar
                temperature_celsius = df.loc[row_index, 'Temperature']  # Temperature in Celsius
                
                # Handle NaN values in pressure or temperature
                if pd.isna(pressure) or pd.isna(temperature_celsius):
                    air_density = 1.225  # fallback value
                else:
                    # Convert temperature from Celsius to Kelvin
                    temperature_k = temperature_celsius + 273.15
                    
                    # Calculate air density: ρ = P / (R * T)
                    # P in mbar, R = 2.8705, T in Kelvin
                    R = 2.8705
                    air_density = pressure / (R * temperature_k)
                    
            except Exception as e:
                print(f"Error calculating air density: {e}, using standard value")
                air_density = 1.225  # fallback value
        
        return value * air_density
    else:
        print(f"Warning: No conversion factor defined for {old_unit} to {new_unit}")
        return value

def process_campaign_data(unit_conversions):
    """
    Process campaign data files and convert units as specified in unit_conversions dict.
    """
    print("\n" + "="*50)
    print("PROCESSING CAMPAIGN DATA:")
    print("="*50)
    
    # Create output directory if it doesn't exist
    os.makedirs(OUTPUT_PATH, exist_ok=True)
    
    for campaign_name, unit_info_df in unit_conversions.items():
        print(f"\nProcessing campaign: {campaign_name}")
        
        # Look for campaign data files in RAW_PATH
        # Assuming files are named with campaign name (adjust pattern as needed)
        campaign_files = glob.glob(os.path.join(RAW_PATH, f"*{campaign_name}*"))
        
        if not campaign_files:
            print(f"  No data files found for campaign {campaign_name}")
            continue
        
        for file_path in campaign_files:
            print(f"  Processing file: {os.path.basename(file_path)}")
            
            try:
                # Read the data file (adjust based on your file format)
                # Assuming CSV format - modify if using different format
                df = pd.read_csv(file_path)
                
                # Apply unit conversions
                conversions_applied = 0
                for _, row in unit_info_df.iterrows():
                    column_name = row['column']
                    old_unit = row['old_unit']
                    new_unit = row['new_unit']
                    
                    if column_name in df.columns:
                        print(f"    Converting {column_name}: {old_unit} → {new_unit}")
                        
                        # Apply conversion with row-by-row processing for ng/kg conversions
                        if old_unit == "ng/kg" and new_unit == "ng/m3":
                            # Need to pass row index for air density calculation
                            converted_values = []
                            for idx in df.index:
                                converted_val = convert_units(df, df.loc[idx, column_name], old_unit, new_unit, row_index=idx)
                                converted_values.append(converted_val)
                            df[column_name] = converted_values
                        else:
                            # Standard conversion without row index
                            df[column_name] = df[column_name].apply(
                                lambda x: convert_units(df, x, old_unit, new_unit)
                            )
                        
                        conversions_applied += 1
                    else:
                        print(f"    Warning: Column '{column_name}' not found in data")
                
                # Save the converted data
                output_filename = f"{campaign_name}.csv"
                output_path = os.path.join(OUTPUT_PATH, output_filename)
                df.to_csv(output_path, index=False)
                
                print(f"    Applied {conversions_applied} unit conversions")
                print(f"    Saved to: {output_filename}")
                
            except Exception as e:
                print(f"    Error processing {file_path}: {e}")

In [7]:
# Read all unit info files #
unit_conversions = read_unit_info_files()

# Display the loaded information
print("\n" + "="*50)
print("LOADED UNIT CONVERSION INFO:")
print("="*50)

for campaign, df in unit_conversions.items():
    print(f"\nCampaign: {campaign}")
    print(df.to_string(index=False))


# Process all campaign data
process_campaign_data(unit_conversions)

print("\n" + "="*50)
print("UNIT CONVERSION COMPLETE!")
print("="*50)


Loaded unit info for campaign: ACMEV
  Columns to convert: 2 entries
Loaded unit info for campaign: ISDAC
  Columns to convert: 1 entries
Loaded unit info for campaign: TCAP2012
  Columns to convert: 1 entries
Loaded unit info for campaign: TCAP2013
  Columns to convert: 1 entries

LOADED UNIT CONVERSION INFO:

Campaign: ACMEV
  column old_unit new_unit
Pressure       Pa       mb
 BC_Mass    ng/kg    ng/m3

Campaign: ISDAC
  column old_unit new_unit
Pressure       Pa       mb

Campaign: TCAP2012
  column old_unit new_unit
Altitude       ft        m

Campaign: TCAP2013
  column old_unit new_unit
Altitude       ft        m

PROCESSING CAMPAIGN DATA:

Processing campaign: ACMEV
  Processing file: ACMEV_avg.csv
    Converting Pressure: Pa → mb
    Converting BC_Mass: ng/kg → ng/m3
    Applied 2 unit conversions
    Saved to: ACMEV.csv

Processing campaign: ISDAC
  Processing file: ISDAC_avg.csv
    Converting Pressure: Pa → mb
    Applied 1 unit conversions
    Saved to: ISDAC.csv

Process