In [1]:
import adios2
print(adios2.__version__)
import pandas as pd
import os
import time
import adios2
from adios2 import FileReader
import adios2.bindings as adios2
import json
import numpy as np

2.10.1


In [33]:

# Function to read a CSV file using POSIX
def read_csv_posix(file_path):
    start_time = time.time()
    df = pd.read_csv(file_path)
    end_time = time.time()
    print(f"Time taken to read CSV (POSIX): {end_time - start_time:.6f} seconds")
    return df

# Function to write a CSV file using POSIX
def write_csv_posix(df, output_path):
    start_time = time.time()
    df.to_csv(output_path, index=False)
    end_time = time.time()
    print(f"Time taken to write CSV (POSIX): {end_time - start_time:.6f} seconds")

def write_adios2(df, bp_file):
    start_time = time.time()
    
    adios = adios2.ADIOS()  #  Initialize ADIOS2
    io = adios.DeclareIO("WriteIO")

    adiosVars={}
    for col in df.columns:
        if df[col].dtypes != np.float64:
            continue
        values = np.array(df[col].values)
        adiosVars[col] = io.DefineVariable(col, values, [len(df)],[0],[len(df)])
    
    stream = io.Open(bp_file, adios2.Mode.Write)
    stream.BeginStep()

    for col in df.columns:
        if df[col].dtypes != np.float64:
            continue
        values = np.array(df[col].values)
        stream.Put(adiosVars[col], values)

    stream.EndStep()

    stream.Close()
    
    end_time = time.time()
    print(f"Time taken to write using ADIOS2: {end_time - start_time:.6f} seconds")


def read_adios2(bp_file):
    start_time = time.time()
    
    adios = adios2.ADIOS()
    io = adios.DeclareIO("ReadIO")
        
        
    # Open engine
    rstream = io.Open(bp_file, adios2.Mode.Read)
    
    data = {}
    while rstream.BeginStep() == adios2.StepStatus.OK:
        for name, info in io.AvailableVariables().items():
            var = io.InquireVariable(name)
            shape = var.Shape()
            buffer = np.empty(shape, dtype=np.float64)
            rstream.Get(var, buffer, adios2.Mode.Sync)
            data[name] = buffer
        rstream.EndStep()
    
    rstream.Close()

    df = pd.DataFrame(data)  
    end_time = time.time()
    print(f"Time taken to read using ADIOS2: {end_time - start_time:.6f} seconds")
    return df
    

    
def compare_adios2_profile(bp_file):
    profile_path = os.path.join(bp_file, "profiling.json")

    if not os.path.exists(profile_path):
        print("Profiling JSON file not found in BP directory.")
        return

    with open(profile_path, "r") as f:
        profile_data = json.load(f)

    if isinstance(profile_data, list):
        if len(profile_data) > 0 and isinstance(profile_data[0], dict):
            profile_data = profile_data[0]  # Assume first dict contains the relevant information

    
    # Extract write time from transport layers
    transport_0_write_time = profile_data.get("transport_0", {}).get("write", {}).get("mus", 0)
    transport_1_write_time = profile_data.get("transport_1", {}).get("write", {}).get("mus", 0)

    # Convert microseconds (mus) to seconds
    adios_recorded_time = (transport_0_write_time + transport_1_write_time) / 1_000_000

    
    
    print(f"ADIOS2 Profiling JSON recorded write time: {adios_recorded_time:.6f} seconds")

In [34]:
if __name__ == "__main__":
    csv_file = "C:/Users/rlw/codes/weatherHistory.csv"
    bp_file = "C:/Users/rlw/codes/weatherHistory.bp"

    # Read dataset using POSIX (CSV)
    df = read_csv_posix(csv_file)

    # Write dataset using POSIX (CSV)
    write_csv_posix(df, "weatherHistory_output.csv")

    
    # Write dataset using ADIOS2
    write_adios2(df, bp_file)


    # Read dataset using ADIOS2
    df_adios = read_adios2(bp_file)

    # Compare ADIOS2 profile timing with measured timing
    compare_adios2_profile(bp_file)  

Time taken to read CSV (POSIX): 0.294078 seconds
Time taken to write CSV (POSIX): 0.832468 seconds
Time taken to write using ADIOS2: 0.015759 seconds
Time taken to read using ADIOS2: 0.004760 seconds
ADIOS2 Profiling JSON recorded write time: 0.001886 seconds


In [5]:
import adios2
print(dir(adios2))  # List all available functions
print(adios2.__version__)  # Check the installed ADIOS2 version

['Adios', 'Attribute', 'Engine', 'FileReader', 'IO', 'LocalValueDim', 'Operator', 'Stream', 'Variable', '__builtins__', '__cached__', '__doc__', '__file__', '__license__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '__version__', 'adios', 'adios2', 'attribute', 'bindings', 'engine', 'file_reader', 'io', 'is_built_with_mpi', 'maxsize', 'np', 'operator', 'singledispatchmethod', 'stream', 'string_to_mode', 'type_adios_to_numpy', 'variable']
2.10.1


In [44]:
df = read_csv_posix("weatherHistory.csv")
len(df)

Time taken to read CSV (POSIX): 0.226214 seconds


96453

In [45]:
df2 = read_csv_posix("water quality data.csv")
len(df2)

  df = pd.read_csv(file_path)


Time taken to read CSV (POSIX): 4.072822 seconds


1090151

In [75]:
write_adios2(df2, "test2.bp")

RuntimeError: [1;36m[Thu Mar 06 13:54:31 2025][1;34m [ADIOS2 EXCEPTION][0m <Toolkit> <transport::file::FilePOSIX> <Write> : couldn't write to file test2.bp\data.0 : errno = 22: Invalid argument[0m
: iostream stream error

In [67]:
for col in df.columns:
    print(col, len(df), df[col].dtypes)
    #print(df[col].values)
    if df[col].dtypes == np.float64:
        print("FLOAT")

Formatted Date 96453 object
Summary 96453 object
Precip Type 96453 object
Temperature (C) 96453 float64
FLOAT
Apparent Temperature (C) 96453 float64
FLOAT
Humidity 96453 float64
FLOAT
Wind Speed (km/h) 96453 float64
FLOAT
Wind Bearing (degrees) 96453 float64
FLOAT
Visibility (km) 96453 float64
FLOAT
Loud Cover 96453 float64
FLOAT
Pressure (millibars) 96453 float64
FLOAT
Daily Summary 96453 object


In [33]:
len(df)

96453

In [57]:
df["Formatted Date"].dtypes

dtype('O')