Extracting DU data from a raw data file and computing the daily average O3 (From Dobson Spectrophotometer)

In [2]:
import re

# Load the data from a file or string
with open('JAN 2024\D0022024.018', 'r') as file:
    data = file.read()

# Extract ozone values from XAD blocks
# Pattern explanation:
# - XAD followed by a timestamp
# - Next three lines of float numbers (we want the third one)
ozone_values = re.findall(r'XAD\s+\d{2}:\d{2}:\d{2}\s+[\d.]+\s+[\d.]+\s+([\d.]+)', data)

# Convert to float
ozone_values = [float(val) for val in ozone_values]

# Calculate average
daily_avg_ozone = sum(ozone_values) / len(ozone_values) if ozone_values else None

print(f"Daily Average Ozone: {daily_avg_ozone:.2f} DU" if daily_avg_ozone else "No ozone values found.")


Daily Average Ozone: 237.63 DU


  with open('JAN 2024\D0022024.018', 'r') as file:


In [30]:
import re

# Initialize containers
zc_ozone = []
ds_ozone = []

# Read the file
with open(r'JAN 2024\D0082024.018', 'r') as file:
    lines = file.readlines()

current_mode = None  # Tracks whether we're in ZC or DS section

for line in lines:
    line = line.strip()

    # Update the current mode
    if line.startswith("ZC"):
        current_mode = "ZC"
    elif line.startswith("DS"):
        current_mode = "DS"

    # Extract ozone value from XAD lines
    if line.startswith("XAD") and current_mode:
        match = re.search(r'XAD\s+\d{2}:\d{2}:\d{2}\s+[\d.]+\s+[\d.]+\s+([\d.]+)', line)
        if match:
            ozone = float(match.group(1))
            if current_mode == "ZC":
                zc_ozone.append(ozone)
            elif current_mode == "DS":
                ds_ozone.append(ozone)

# Calculate and print averages
if zc_ozone:
    zc_avg = sum(zc_ozone) / len(zc_ozone)
    print(f"Average Ozone (ZC): {zc_avg:.2f} DU")
else:
    print("No ZC ozone data found.")

if ds_ozone:
    ds_avg = sum(ds_ozone) / len(ds_ozone)
    print(f"Average Ozone (DS): {ds_avg:.2f} DU")
else:
    print("No DS ozone data found.")


No ZC ozone data found.
No DS ozone data found.


In [39]:
import re
from datetime import datetime
from typing import List, Dict


def parse_dobson_018(content: str) -> List[Dict[str, str]]:
    ds_blocks = re.split(r'\n(?=DS\s+000)', content.strip())
    ozone_data = []

    for block in ds_blocks:
        if 'XAD' not in block:
            continue  # Skip if no ozone result
        xad_matches = re.findall(r'XAD\s+(\d{2}:\d{2}:\d{2})\s+([\d.]+)', block)
        for match in xad_matches:
            time_str, ozone_value = match
            ozone_data.append({
                "time": time_str,
                "ozone": float(ozone_value)
            })
    return ozone_data


# --- Usage Example ---

# Read the file contents (replace this with actual file read in practice)
with open("JAN 2024\D0302024.018", "r") as f:
    file_content = f.read()

ozone_results = parse_dobson_018(file_content)

# Display extracted results
for result in ozone_results:
    print(f"Time: {result['time']}, Ozone: {result['ozone']:.3f}")


Time: 06:12:00, Ozone: 1.731
Time: 06:14:00, Ozone: 1.712
Time: 06:17:00, Ozone: 1.684
Time: 06:24:00, Ozone: 1.623
Time: 08:04:00, Ozone: 1.153
Time: 08:05:00, Ozone: 1.151
Time: 08:11:00, Ozone: 1.137
Time: 08:07:00, Ozone: 1.146
Time: 11:13:00, Ozone: 1.121
Time: 11:14:00, Ozone: 1.123
Time: 11:15:00, Ozone: 1.125
Time: 11:16:00, Ozone: 1.127
Time: 12:07:00, Ozone: 1.272
Time: 12:09:00, Ozone: 1.279
Time: 12:11:00, Ozone: 1.287
Time: 12:16:00, Ozone: 1.308


  with open("JAN 2024\D0302024.018", "r") as f:
