## Calculating Optical Depth

#### averaged BLANK files

In [3]:
# Reflectance
import os
import pandas as pd

source_dir = r"C:\Users\Akanksha\OneDrive\Desktop\data\ALL_Reflectance"
target_dir = os.path.join(source_dir, "Files_without_PM25")

# List all BLANK files in the directory
blank_files = [f for f in os.listdir(target_dir) if f.startswith("BLANK") and f.endswith(".csv")]

# Read and combine all files
dfs = []
for file in blank_files:
    file_path = os.path.join(target_dir, file)
    df = pd.read_csv(file_path)
    df.columns = df.columns.str.strip()
    if 'nm' in df.columns and '%R' in df.columns:
        dfs.append(df[['nm', '%R']])
    else:
        print(f"Skipping {file} due to missing columns")

# Combine and group
if dfs:
    combined_df = pd.concat(dfs)
    averaged_df = combined_df.groupby('nm', as_index=False)['%R'].mean()

    # Save the result
    output_path = os.path.join(source_dir, "Average_BLANK_ref.csv")
    averaged_df.to_csv(output_path, index=False)
    print(f"Saved averaged data to: {output_path}")
else:
    print("No valid BLANK files found.")

Saved averaged data to: C:\Users\Akanksha\OneDrive\Desktop\data\ALL_Reflectance\Average_BLANK_ref.csv


In [4]:
# Transmittance
import os
import pandas as pd

source_dir = r"C:\Users\Akanksha\OneDrive\Desktop\data\ALL_Transmittance"
target_dir = os.path.join(source_dir, "Files_without_PM25")

# List all BLANK files in the directory
blank_files = [f for f in os.listdir(target_dir) if f.startswith("BLANK") and f.endswith(".csv")]

# Read and combine all files
dfs = []
for file in blank_files:
    file_path = os.path.join(target_dir, file)
    df = pd.read_csv(file_path)
    df.columns = df.columns.str.strip()
    if 'nm' in df.columns and '%T' in df.columns:
        dfs.append(df[['nm', '%T']])
    else:
        print(f"Skipping {file} due to missing columns")

# Combine and group
if dfs:
    combined_df = pd.concat(dfs)
    averaged_df = combined_df.groupby('nm', as_index=False)['%T'].mean()

    output_path = os.path.join(source_dir, "Average_BLANK_tran.csv")
    averaged_df.to_csv(output_path, index=False)
    print(f"Saved averaged data to: {output_path}")
else:
    print("No valid BLANK files found.")

Saved averaged data to: C:\Users\Akanksha\OneDrive\Desktop\data\ALL_Transmittance\Average_BLANK_tran.csv


#### normalization of %R and %T

In [6]:
folder = r"C:\Users\Akanksha\OneDrive\Desktop\data\ALL_Reflectance"

# Load reference data
ref = pd.read_csv(os.path.join(folder, "Average_BLANK_ref.csv"))
ref.columns = ref.columns.str.strip()
ref = ref[['nm', '%R']].rename(columns={'%R': 'ref_R'})

# Loop through all CSV files except the reference
for file in os.listdir(folder):
    if file.endswith(".csv") and file != "Average_BLANK_ref.csv":
        path = os.path.join(folder, file)

        df = pd.read_csv(path)
        df.columns = df.columns.str.strip()

        if 'nm' in df.columns and '%R' in df.columns:
            # Merge with reference on 'nm'
            df = df.merge(ref, on='nm', how='left')

            # Add normalized column
            df['normalized_%R'] = df['%R'] / df['ref_R']

            # Drop the temporary column
            df.drop(columns=['ref_R'], inplace=True)

            # Save back to same file
            df.to_csv(path, index=False)
            print(f"Updated: {file}")
        else:
            print(f"Skipped (missing columns): {file}")

Updated: AEAZ-0113-1.Sample.Raw.csv
Updated: AEAZ-0114-2.Sample.Raw.csv
Updated: AEAZ-0115-3.Sample.Raw.csv
Updated: AEAZ-0116-4.Sample.Raw.csv
Updated: AEAZ-0117-5.Sample.Raw.csv
Updated: AEAZ-0121-1.Sample.Raw.csv
Updated: AEAZ-0122-2.Sample.Raw.csv
Updated: AEAZ-0123-3.Sample.Raw.csv
Updated: AEAZ-0124-4.Sample.Raw.csv
Updated: AEAZ-0125-5.Sample.Raw.csv
Updated: AEAZ-0126-6.Sample.Raw.csv
Updated: AEAZ-0129-1.Sample.Raw.csv
Updated: AEAZ-0130-2.Sample.Raw.csv
Updated: AEAZ-0131-3.Sample.Raw.csv
Updated: AEAZ-0132-4.Sample.Raw.csv
Updated: AEAZ-0133-5.Sample.Raw.csv
Updated: AEAZ-0134-6.Sample.Raw.csv
Updated: AEAZ-0137-1.Sample.Raw.csv
Updated: AEAZ-0138-2.Sample.Raw.csv
Updated: AEAZ-0139-3.Sample.Raw.csv
Updated: AEAZ-0141-5.Sample.Raw.csv
Updated: AEAZ-0142-6.Sample.Raw.csv
Updated: AEAZ-0145-1.Sample.Raw.csv
Updated: AEAZ-0146-2.Sample.Raw.csv
Updated: AEAZ-0147-3.Sample.Raw.csv
Updated: AEAZ-0148-4.Sample.Raw.csv
Updated: AEAZ-0149-5.Sample.Raw.csv
Updated: AEAZ-0150-6.Sample.

In [7]:
folder = r"C:\Users\Akanksha\OneDrive\Desktop\data\ALL_Transmittance"

ref = pd.read_csv(os.path.join(folder, "Average_BLANK_tran.csv"))
ref.columns = ref.columns.str.strip()
ref = ref[['nm', '%T']].rename(columns={'%T': 'ref_T'})

# Loop through all CSV files except the reference
for file in os.listdir(folder):
    if file.endswith(".csv") and file != "Average_BLANK_tran.csv":
        path = os.path.join(folder, file)

        df = pd.read_csv(path)
        df.columns = df.columns.str.strip()

        if 'nm' in df.columns and '%T' in df.columns:
            # Merge with reference on 'nm'
            df = df.merge(ref, on='nm', how='left')

            # Add normalized column
            df['normalized_%T'] = df['%T'] / df['ref_T']

            # Drop the temporary column
            df.drop(columns=['ref_T'], inplace=True)

            # Save back to same file
            df.to_csv(path, index=False)
            print(f"Updated: {file}")
        else:
            print(f"Skipped (missing columns): {file}")

Updated: AEAZ-0113-1.Sample.Raw.csv
Updated: AEAZ-0114-2.Sample.Raw.csv
Updated: AEAZ-0115-3.Sample.Raw.csv
Updated: AEAZ-0116-4.Sample.Raw.csv
Updated: AEAZ-0117-5.Sample.Raw.csv
Updated: AEAZ-0121-1.Sample.Raw.csv
Updated: AEAZ-0122-2.Sample.Raw.csv
Updated: AEAZ-0123-3.Sample.Raw.csv
Updated: AEAZ-0124-4.Sample.Raw.csv
Updated: AEAZ-0125-5.Sample.Raw.csv
Updated: AEAZ-0126-6.Sample.Raw.csv
Updated: AEAZ-0129-1.Sample.Raw.csv
Updated: AEAZ-0130-2.Sample.Raw.csv
Updated: AEAZ-0131-3.Sample.Raw.csv
Updated: AEAZ-0132-4.Sample.Raw.csv
Updated: AEAZ-0133-5.Sample.Raw.csv
Updated: AEAZ-0134-6.Sample.Raw.csv
Updated: AEAZ-0137-1.Sample.Raw.csv
Updated: AEAZ-0138-2.Sample.Raw.csv
Updated: AEAZ-0139-3.Sample.Raw.csv
Updated: AEAZ-0141-5.Sample.Raw.csv
Updated: AEAZ-0142-6.Sample.Raw.csv
Updated: AEAZ-0145-1.Sample.Raw.csv
Updated: AEAZ-0146-2.Sample.Raw.csv
Updated: AEAZ-0147-3.Sample.Raw.csv
Updated: AEAZ-0148-4.Sample.Raw.csv
Updated: AEAZ-0149-5.Sample.Raw.csv
Updated: AEAZ-0150-6.Sample.

## Calculating Optical Depth

In [9]:
import numpy as np
import pandas as pd
import os

reflectance_folder = r"C:\Users\Akanksha\OneDrive\Desktop\data\ALL_Reflectance"
transmittance_folder = r"C:\Users\Akanksha\OneDrive\Desktop\data\ALL_Transmittance"

# Load your main PM2.5 DataFrame 
pm25_df = pd.read_csv("pm25_df.csv") 

# Store results
optical_depths = []

# Loop through Filter IDs in pm25_df
for filter_id in pm25_df["Filter ID"]:
    try:
        filename = f"{filter_id}.Sample.Raw.csv"
        reflectance_path = os.path.join(reflectance_folder, filename)
        transmittance_path = os.path.join(transmittance_folder, filename)

        if not os.path.exists(reflectance_path) or not os.path.exists(transmittance_path):
            print(f"{filter_id}: File missing")
            optical_depths.append(np.nan)
            continue

        refl = pd.read_csv(reflectance_path)
        trans = pd.read_csv(transmittance_path)

        refl.columns = refl.columns.str.strip()
        trans.columns = trans.columns.str.strip()

        if 'nm' not in refl.columns or 'normalized_%R' not in refl.columns or \
           'nm' not in trans.columns or 'normalized_%T' not in trans.columns:
            print(f"{filter_id}: Required columns missing")
            optical_depths.append(np.nan)
            continue

        refl = refl.rename(columns={'normalized_%R': 'R_s'})
        trans = trans.rename(columns={'normalized_%T': 'T_s'})
        
        refl['nm'] = refl['nm'].astype(float)
        trans['nm'] = trans['nm'].astype(float)
        
        merged = pd.merge(
            refl[['nm', 'R_s']],
            trans[['nm', 'T_s']],
            on='nm',
            how='inner'
        )

        # Clip to avoid log domain issues
        merged['R_s'] = merged['R_s'].clip(0, 0.99)
        merged['T_s'] = merged['T_s'].clip(lower=1e-6)

        # Calculate optical depth
        merged['optical_depth'] = np.log((1 - merged['R_s']) / merged['T_s'])

        avg_od = merged.loc[merged['nm'] == 550, 'optical_depth'].values[0]
        print(f"{filter_id}: OD at 550 nm = {avg_od}")

    except Exception as e:
        print(f"{filter_id}: Error - {e}")
        avg_od = np.nan

    optical_depths.append(avg_od)

# Add new column to pm25_df
pm25_df['optical_depth'] = optical_depths

# Save result
pm25_df.to_csv("pm25_df.csv", index=False)

USPA-0161-1: OD at 550 nm = 0.4612155702848321
USPA-0162-2: OD at 550 nm = 0.5469082376640597
USPA-0163-3: OD at 550 nm = 0.49097257727163585
USPA-0164-4: OD at 550 nm = 0.5182851139642628
USPA-0165-5: OD at 550 nm = 0.5857609225610295
USPA-0166-6: OD at 550 nm = 0.5233733047940508
USPA-0168-8: OD at 550 nm = 0.4035376759156729
USPA-0169-1: OD at 550 nm = 0.5554544306676482
USPA-0170-2: OD at 550 nm = 0.4642113505938401
USPA-0171-3: OD at 550 nm = 0.4440006796285237
USPA-0172-4: OD at 550 nm = 0.4009920365847173
USPA-0173-5: OD at 550 nm = 0.4334627496635786
USPA-0174-6: OD at 550 nm = 0.43199146955086914
USPA-0176-8: OD at 550 nm = 0.43229144878379383
USPA-0177-1: OD at 550 nm = 0.5540632607221655
USPA-0178-2: OD at 550 nm = 0.4372103287261381
USPA-0179-3: OD at 550 nm = 0.4483271677924057
USPA-0180-4: OD at 550 nm = 0.4371374430391245
USPA-0181-5: OD at 550 nm = 0.590125608746116
USPA-0182-6: OD at 550 nm = 0.4898931818670695
USPA-0184-8: OD at 550 nm = 0.5186381454669751
USPA-0185-1

In [10]:
pm25_df = pm25_df.dropna(subset=['optical_depth'])
pm25_df.to_csv("pm25_df.csv", index=False)

## Calculating Aerosol Absorption Coefficient

In [12]:
d = 25e-3  
A_s = (np.pi * (d ** 2) / 4) 

# Compute absorption coefficient column
pm25_df['b_abs_lambda'] = (0.48 * (pm25_df['optical_depth'] ** 1.32) * A_s / pm25_df['Sampled volume (m3)']) * 1e6

# Save updated CSV
pm25_df.to_csv("pm25_df.csv", index=False)


## Calculating MAC

In [14]:
pm25_df['MAC_lambda'] = pm25_df['b_abs_lambda'] / pm25_df['PM2.5(ug/m3)'] 
pm25_df.to_csv("pm25_df.csv", index=False)

## mass fraction of EC (𝑓_𝐸𝐶)

In [16]:
import numpy as np
import pandas as pd
import os

reflectance_folder = r"C:\Users\Akanksha\OneDrive\Desktop\data\ALL_Reflectance"
transmittance_folder = r"C:\Users\Akanksha\OneDrive\Desktop\data\ALL_Transmittance"

# Load your main PM2.5 DataFrame 
pm25_df = pd.read_csv("pm25_df.csv") 

# Store OD@900nm results
ODs_900 = []

for filter_id in pm25_df["Filter ID"]:
    filename = f"{filter_id}.Sample.Raw.csv"
    reflectance_path = os.path.join(reflectance_folder, filename)
    transmittance_path = os.path.join(transmittance_folder, filename)

    if os.path.exists(reflectance_path) and os.path.exists(transmittance_path):
        refl = pd.read_csv(reflectance_path)
        trans = pd.read_csv(transmittance_path)

        refl.columns = refl.columns.str.strip()
        trans.columns = trans.columns.str.strip()

        if 'nm' in refl.columns and 'normalized_%R' in refl.columns and \
           'nm' in trans.columns and 'normalized_%T' in trans.columns:
        
            refl = refl.rename(columns={'normalized_%R': 'R_s'})
            trans = trans.rename(columns={'normalized_%T': 'T_s'})
        
            refl['nm'] = refl['nm'].astype(float)
            trans['nm'] = trans['nm'].astype(float)

            # Get row for 900 nm
            R_900 = refl.loc[refl['nm'] == 900, 'R_s'].values
            T_900 = trans.loc[trans['nm'] == 900, 'T_s'].values

            if len(R_900) > 0 and len(T_900) > 0:
                R_s = np.clip(R_900[0], 0, 0.99)
                T_s = max(T_900[0], 1e-6)  # avoid log(0)
                od_900 = np.log((1 - R_s) / T_s)
                print(f"{filter_id}: OD@900nm = {od_900}")
            else:
                print(f"{filter_id}: 900 nm value missing")
                od_900 = np.nan
        else:
            print(f"{filter_id}: Required columns missing")
            od_900 = np.nan
    else:
        print(f"{filter_id}: Missing reflectance/transmittance files")
        od_900 = np.nan

    ODs_900.append(od_900)

# Add new column to pm25_df
pm25_df['OD@900'] = ODs_900

# Save result
pm25_df.to_csv("pm25_df.csv", index=False)

USPA-0161-1: OD@900nm = 0.2964289407525746
USPA-0162-2: OD@900nm = 0.33610106195374867
USPA-0163-3: OD@900nm = 0.28718023166811424
USPA-0164-4: OD@900nm = 0.3103508000663377
USPA-0165-5: OD@900nm = 0.3657928618529743
USPA-0166-6: OD@900nm = 0.3060423222059064
USPA-0168-8: OD@900nm = 0.2384213527960153
USPA-0169-1: OD@900nm = 0.3408895761298722
USPA-0170-2: OD@900nm = 0.30237275475727715
USPA-0171-3: OD@900nm = 0.2627039524618313
USPA-0172-4: OD@900nm = 0.2257913259153744
USPA-0173-5: OD@900nm = 0.2481799326010952
USPA-0174-6: OD@900nm = 0.24242145293141745
USPA-0176-8: OD@900nm = 0.23959453044149243
USPA-0177-1: OD@900nm = 0.3556816585459034
USPA-0178-2: OD@900nm = 0.2726756419227005
USPA-0179-3: OD@900nm = 0.2718227947515427
USPA-0180-4: OD@900nm = 0.254017851754597
USPA-0181-5: OD@900nm = 0.36623949020075114
USPA-0182-6: OD@900nm = 0.30985677893224556
USPA-0184-8: OD@900nm = 0.30620360731719437
USPA-0185-1: OD@900nm = 0.42845629753434605
USPA-0186-2: OD@900nm = 0.28850402939247416
US

In [17]:
d = 25e-3  
A_s = (np.pi * (d ** 2) / 4) 

# Compute absorption coefficient column
pm25_df['b_abs_lambda@900'] = (0.48 * (pm25_df['OD@900'] ** 1.32) * A_s / pm25_df['Sampled volume (m3)']) * 1e6

# Save updated CSV
pm25_df.to_csv("pm25_df.csv", index=False)

In [18]:
pm25_df['MAC_lambda@900'] = pm25_df['b_abs_lambda@900'] / pm25_df['PM2.5(ug/m3)'] 
pm25_df.to_csv("pm25_df.csv", index=False)

In [19]:
pm25_df['f_mass_EC'] = pm25_df['MAC_lambda@900'] / 4.58
pm25_df.to_csv("pm25_df.csv", index=False)

## Black Carbon Mass

In [21]:
pm25_df['Black_carbon_mass'] = pm25_df['f_mass_EC'] / pm25_df['PM2.5(ug/m3)']
pm25_df.to_csv("pm25_df.csv", index=False)

In [6]:
import pandas as pd
pm25_df = pd.read_csv('pm25_df.csv')

In [8]:
pm25_df.columns

Index(['Filter ID', 'Analysis ID', 'Filter Type', 'Sampling Start Date',
       'Sampling End Date', 'Mass collected on filter (ug)',
       'Sampled volume (m3)', 'PM2.5(ug/m3)', 'optical_depth', 'b_abs_lambda',
       'MAC_lambda', 'OD@900', 'b_abs_lambda@900', 'MAC_lambda@900',
       'f_mass_EC', 'Black_carbon_mass', 'Non_Black_carbon_mass',
       'b_abs_BC@400', 'b_abs_BrC@400', 'Site', 'Country', 'Hemisphere',
       'Sampling Mid Date', 'Month', 'Season', 'b_abs_BC@550', 'b_abs_BrC@550',
       'Latitude', 'Longitude'],
      dtype='object')

In [23]:
pm25_df = pm25_df.dropna(subset=['b_abs_lambda','MAC_lambda', 'OD@900', 'b_abs_lambda@900', 'MAC_lambda@900', 'f_mass_EC', 'Black_carbon_mass'])
pm25_df.to_csv("pm25_df.csv", index=False)

In [24]:
pm25_df['Non_Black_carbon_mass'] = 1 - pm25_df['Black_carbon_mass']
pm25_df.to_csv("pm25_df.csv", index=False)

In [25]:
pm25_df.columns

Index(['Filter ID', 'Analysis ID', 'Filter Type', 'Sampling Start Date',
       'Sampling End Date', 'Mass collected on filter (ug)',
       'Sampled volume (m3)', 'PM2.5(ug/m3)', 'optical_depth', 'b_abs_lambda',
       'MAC_lambda', 'OD@900', 'b_abs_lambda@900', 'MAC_lambda@900',
       'f_mass_EC', 'Black_carbon_mass', 'Non_Black_carbon_mass',
       'b_abs_BC@400', 'b_abs_BrC@400', 'Site', 'Country', 'Hemisphere',
       'Sampling Mid Date', 'Month', 'Season'],
      dtype='object')

In [26]:
pm25_df['b_abs_BC@400'] = pm25_df['b_abs_lambda@900'] * np.exp(-4 / 9)
pm25_df.to_csv("pm25_df.csv", index=False)

In [27]:
pm25_df['b_abs_BrC@400'] = pm25_df['b_abs_lambda'] -  pm25_df['b_abs_BC@400']
pm25_df.to_csv("pm25_df.csv", index=False)

In [32]:
pm25_df['b_abs_BC@550'] = pm25_df['b_abs_lambda@900'] * np.exp(-55/90)
pm25_df.to_csv("pm25_df.csv", index=False)

In [35]:
pm25_df['b_abs_BrC@550'] = pm25_df['b_abs_lambda'] -  pm25_df['b_abs_BC@550']
pm25_df.to_csv("pm25_df.csv", index=False)