NAIROBI OZONESONDE DATA AVAILABILITY

In [9]:
#!/usr/bin/env python3
"""
Count ozonesonde launches per year from files named like:
  shadoz_nairobi_1998_V06, shadoz_nairobi_1999_V06.txt, shadoz_nairobi_2023_V06.gz, ...
Place files inside a folder named OZONELAYER_KENYA (script can be run from its parent).
"""
import os
import re
import gzip
import numpy as np
import matplotlib.pyplot as plt

# --- Configuration ---
start_year, end_year = 1998, 2024
# set search_dir to the folder name you gave; script will strip stray punctuation
search_dir = "OZONELAYER_KENYA"
# match base filename, allow optional extension (txt, dat, csv, gz, none)
file_pattern = re.compile(r"^shadoz_nairobi_(\d{4})_V06(?:\.(?:txt|dat|csv|gz|zip|asc))?$",
                          flags=re.IGNORECASE)
# match lines that start with ISO date (YYYY-MM-DD) or DMY (DD/MM/YYYY or DD-MM-YYYY)
date_line_re = re.compile(r"^\s*(?:[12]\d{3}[-/]\d{1,2}[-/]\d{1,2}|\d{1,2}[-/]\d{1,2}[-/][12]\d{3})")
years = list(range(start_year, end_year + 1))
counts = {y: 0 for y in years}
matched_files = []

# Normalize folder name if user included punctuation like parentheses or colon
search_dir = search_dir.strip(" :()\"'")

# Walk directory (includes subfolders)
if not os.path.isdir(search_dir):
    raise SystemExit(f"Directory not found: {search_dir!r}. Run from parent directory or give absolute path.")

for root, _, files in os.walk(search_dir):
    for fname in files:
        m = file_pattern.match(fname)
        if not m:
            continue
        yr = int(m.group(1))
        if not (start_year <= yr <= end_year):
            continue
        path = os.path.join(root, fname)
        matched_files.append((yr, path))

# Diagnostics
if not matched_files:
    raise SystemExit(f"No files matched in '{search_dir}'. Check folder and filenames.")
print(f"Found {len(matched_files)} matching files (showing up to 20):")
for i, (_, p) in enumerate(matched_files[:20], 1):
    print(f"  {i}. {p}")

# Counting heuristic
for yr, path in matched_files:
    try:
        if path.lower().endswith(".gz"):
            opener = lambda p: gzip.open(p, "rt", errors="ignore")
        else:
            opener = lambda p: open(p, "r", errors="ignore")
        with opener(path) as fh:
            lines = fh.readlines()
    except Exception as e:
        print(f"Could not read {path}: {e}; counting as 1 launch")
        counts[yr] += 1
        continue

    # 1) Count lines that begin with a date
    date_lines = sum(1 for L in lines if date_line_re.match(L))
    if date_lines:
        counts[yr] += date_lines
        print(f"{os.path.basename(path)} -> {date_lines} date lines")
        continue

    # 2) Fallback: count non-empty blocks separated by blank lines
    blocks = 0
    in_block = False
    for L in lines:
        if L.strip():
            if not in_block:
                blocks += 1
                in_block = True
        else:
            in_block = False
    if blocks > 0:
        counts[yr] += blocks
        print(f"{os.path.basename(path)} -> {blocks} blocks (fallback)")
        continue

    # 3) final fallback: treat file as single launch
    counts[yr] += 1
    print(f"{os.path.basename(path)} -> no matches, counted as 1")

# Prepare arrays for plotting
years_arr = np.array(years)
launch_counts = np.array([counts[y] for y in years_arr], dtype=int)
print(f"Total launches summed: {launch_counts.sum()}")

# Plot
plt.style.use("seaborn-v0_8")
fig, ax = plt.subplots(figsize=(14, 6))
ax.bar(years_arr, launch_counts, width=0.9, color="tab:blue", edgecolor="k")
ax.set_title("Ozonesonde Launches in Nairobi (1998–2024)", fontsize=16)
ax.set_xlabel("Year", fontsize=12)
ax.set_ylabel("Number of Launches", fontsize=12)
ax.set_xticks(years_arr[::2])
ax.set_xticklabels(years_arr[::2], rotation=45, fontsize=10)
ax.grid(axis="y", linestyle="--", alpha=0.6)

# annotate non-zero bars
for x, h in zip(years_arr, launch_counts):
    if h > 0:
        ax.text(x, h + 0.5, str(int(h)), ha="center", va="bottom", fontsize=9)

plt.tight_layout()
plt.show()


SystemExit: Directory not found: 'OZONELAYER_KENYA'. Run from parent directory or give absolute path.

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
