In [1]:
# January 2023_Long TAF - Improved with Error Checking
import os
import re
import pandas as pd
from datetime import datetime, timedelta

# --------------------------
# Helpers for datetimes
# --------------------------
def make_datetime(year, month, day, hour):
    """Create datetime object with error handling for invalid dates."""
    try:
        return datetime(year, month, day, hour)
    except ValueError:
        # Adjust for days that exceed month limits
        d = datetime(year, month, 1) + timedelta(days=day-1, hours=hour)
        return d

def adjust_end_if_earlier(start_dt, end_dt):
    """Ensure end datetime is after start datetime."""
    if pd.isna(start_dt) or pd.isna(end_dt):
        return end_dt
    while end_dt <= start_dt:
        end_dt += timedelta(days=1)
    return end_dt

# --------------------------
# Clean raw TAF file
# --------------------------
def clean_taf_ogimet_file(file_path):
    """Extract individual TAF reports from raw file with validation."""
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"Input file not found: {file_path}")
    
    if not os.path.isfile(file_path):
        raise ValueError(f"Path is not a file: {file_path}")
    
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            text = f.read()
    except UnicodeDecodeError:
        print(f"[WARNING] UTF-8 decode failed, trying with 'latin-1' encoding")
        with open(file_path, 'r', encoding='latin-1') as f:
            text = f.read()
    
    if not text.strip():
        raise ValueError(f"Input file is empty: {file_path}")
    
    # Extract TAF reports
    taf_list = re.findall(r'\d{12} TAF .+?=', text, re.DOTALL)
    taf_list = [t.strip("= \n") for t in taf_list]
    
    if not taf_list:
        raise ValueError(f"No valid TAF reports found in file: {file_path}")
    
    print(f"[INFO] Found {len(taf_list)} TAF report(s) in file")
    return taf_list

# --------------------------
# Parse TAF header
# --------------------------
def parse_taf_header(taf_text):
    """Extract header information from TAF report with validation."""
    if not taf_text or not isinstance(taf_text, str):
        raise ValueError("Invalid TAF text: empty or not a string")
    
    m = re.match(r'(?P<Cdate>\d{12}) TAF (?P<station>\w+)', taf_text)
    if not m:
        raise ValueError(f"Invalid TAF header format. Expected 'YYYYMMDDHHMM TAF STATION'. Got: {taf_text[:50]}...")
    
    d = m.group('Cdate')
    year = int(d[:4])
    month = int(d[4:6])
    
    # Validate year and month
    if not (1900 <= year <= 2100):
        raise ValueError(f"Invalid year in TAF header: {year}")
    if not (1 <= month <= 12):
        raise ValueError(f"Invalid month in TAF header: {month}")
    
    return {
        'Cdate': d,
        'station': m.group('station'),
        'year': year,
        'month': month
    }

# --------------------------
# Extract change groups
# --------------------------
def extract_groups(taf_text):
    """Extract change groups (FM, BECMG, TEMPO, PROB) from TAF."""
    pattern = r'(FM\d{6}|BECMG \d{4}/\d{4}|TEMPO \d{4}/\d{4}|PROB\d+ TEMPO \d{4}/\d{4})'
    matches = list(re.finditer(pattern, taf_text))
    
    groups = []
    last_pos = 0
    
    for m in matches:
        start = m.start()
        # Append remaining text to previous group
        if last_pos < start and groups:
            groups[-1]['text'] += " " + taf_text[last_pos:start].strip()
        
        g_text = m.group()
        groups.append({'group': g_text, 'text': g_text})
        last_pos = m.end()
    
    # Append any remaining text to last group
    if last_pos < len(taf_text) and groups:
        groups[-1]['text'] += " " + taf_text[last_pos:].strip()
    
    return groups

# --------------------------
# Parse start/end day/hour
# --------------------------
def parse_group_times(group):
    """Extract start and end times from group header."""
    text = group['group']
    
    # FM format: FMDDHHMM
    m = re.match(r'FM(\d{2})(\d{2})(\d{2})', text)
    if m:
        day = int(m.group(1))
        hour = int(m.group(2))
        if not (1 <= day <= 31):
            print(f"[WARNING] Invalid day in FM group: {day}")
        if not (0 <= hour <= 23):
            print(f"[WARNING] Invalid hour in FM group: {hour}")
        return day, hour, None, None
    
    # BECMG/TEMPO format: DDHH/DDHH
    m = re.search(r'(\d{2})(\d{2})/(\d{2})(\d{2})', text)
    if m:
        sday, shour = int(m.group(1)), int(m.group(2))
        eday, ehour = int(m.group(3)), int(m.group(4))
        
        if not (1 <= sday <= 31):
            print(f"[WARNING] Invalid start day: {sday}")
        if not (0 <= shour <= 23):
            print(f"[WARNING] Invalid start hour: {shour}")
        if not (1 <= eday <= 31):
            print(f"[WARNING] Invalid end day: {eday}")
        if not (0 <= ehour <= 23):
            print(f"[WARNING] Invalid end hour: {ehour}")
        
        return sday, shour, eday, ehour
    
    return None, None, None, None

# --------------------------
# Extractors
# --------------------------
def extract_wind(text):
    """Extract wind direction, speed, and gust from TAF text."""
    wind_pat = r'\b(VRB|\d{3})(\d{2,3})(G(\d{2,3}))?KT\b'
    m = re.search(wind_pat, text)
    if not m:
        return None, None, None
    
    wd = None if m.group(1) == 'VRB' else int(m.group(1))
    sp = int(m.group(2))
    gust = int(m.group(4)) if m.group(4) else None
    
    # Validate wind values
    if wd is not None and not (0 <= wd <= 360):
        print(f"[WARNING] Invalid wind direction: {wd}")
    if sp > 200:
        print(f"[WARNING] Unusually high wind speed: {sp}")
    if gust is not None and gust > 250:
        print(f"[WARNING] Unusually high gust: {gust}")
    
    return wd, sp, gust

def extract_visibility(text):
    """Extract visibility in meters, rounded to nearest 25m."""
    vis_m = re.findall(r' (\d{4}) ', ' ' + text + ' ')
    if vis_m:
        vis = int(vis_m[0])
        if vis > 9999:
            print(f"[WARNING] Unusually high visibility: {vis}")
        vis = (vis // 25) * 25
        return vis
    return None

def extract_clouds(text):
    """Extract cloud layer information."""
    cloud_m = re.findall(r'\b(FEW|SCT|BKN|OVC)(\d{3})(CB)?\b', text)
    clouds = []
    for c in cloud_m:
        base_height = int(c[1])
        if base_height > 500:
            print(f"[WARNING] Unusually high cloud base: {base_height}00 feet")
        
        base = c[0] + c[1]
        if c[2]:
            base += "CB"
        clouds.append(base)
    return ', '.join(clouds) if clouds else None

def extract_weather(text):
    """Extract weather phenomena from TAF text."""
    wxlist = [
        "TS", "RA", "DZ", "SN", "SG", "PL", "GR", "GS", "IC",
        "FG", "BR", "HZ", "FU", "DU", "SA", "VA", "SQ", "FC",
        "SS", "DS", "SH", "TSRA", "VCTS", "VCSH"
    ]
    
    # Remove known patterns that aren't weather
    text2 = re.sub(r'\b(VRB|\d{3})(\d{2,3})(G\d{2,3})?KT\b', ' ', text)
    text2 = re.sub(r'\b\d{4}\b', ' ', text2)
    text2 = re.sub(r'\b(FEW|SCT|BKN|OVC)\d{3}CB?\b', ' ', text2)
    text2 = re.sub(r'\b(BECMG|TEMPO|FM|PROB\d+|PROB)\b', ' ', text2)
    
    toks = [t for t in re.split(r'\s+', text2) if t.strip()]
    out = []
    
    for tok in toks:
        if re.match(r'^(VC|\+|\-)?[A-Z]{2,}$', tok):
            if any(wx in tok for wx in wxlist):
                out.append(tok)
    
    return ', '.join(out) if out else None

# --------------------------
# Parse group data
# --------------------------
def parse_group_data(text):
    """Extract all meteorological data from group text."""
    wd, sp, gst = extract_wind(text)
    vis = extract_visibility(text)
    wx = extract_weather(text)
    cld = extract_clouds(text)
    
    return {
        'wind_dir': wd,
        'wind_spd': sp,
        'wind_gust': gst,
        'vis': vis,
        'weather': wx,
        'clouds': cld
    }

# --------------------------
# INITIAL group
# --------------------------
def add_initial_group(taf_text, groups):
    """Add INITIAL group representing the base forecast period."""
    m = re.search(r'\d{4}/\d{4}', taf_text)
    if m and groups:
        sday = int(m.group()[:2])
        shour = int(m.group()[2:4])
        
        # Validate initial period times
        if not (1 <= sday <= 31):
            print(f"[WARNING] Invalid initial start day: {sday}")
        if not (0 <= shour <= 23):
            print(f"[WARNING] Invalid initial start hour: {shour}")
        
        next_sday, next_shour, _, _ = parse_group_times(groups[0])
        init_text = taf_text.split(groups[0]['group'])[0].strip()

        initial = {
            'group': 'INITIAL',
            'text': init_text,
            'sday': sday,
            'shour': shour,
            'eday': next_sday,
            'ehour': next_shour
        }
        groups.insert(0, initial)
    elif not groups:
        print("[WARNING] No change groups found in TAF")
    
    return groups

# --------------------------
# Main TAF → dataframe
# --------------------------
def process_taf_to_df(taf_text):
    """Process single TAF report into structured dataframe."""
    if not taf_text or not taf_text.strip():
        raise ValueError("Empty TAF text provided")
    
    # Parse header
    try:
        header = parse_taf_header(taf_text)
    except ValueError as e:
        raise ValueError(f"Header parsing failed: {e}")
    
    year, month = header['year'], header['month']
    station = header['station']

    # Extract groups
    groups = extract_groups(taf_text)
    groups = add_initial_group(taf_text, groups)
    
    if not groups:
        raise ValueError(f"No groups found in TAF for station {station}")

    rows = []
    for i, g in enumerate(groups):
        sday, shour, eday, ehour = parse_group_times(g)

        # Set times if parsed successfully
        if sday is not None:
            g.setdefault('sday', sday)
            g.setdefault('shour', shour)
        if eday is not None:
            g.setdefault('eday', eday)
            g.setdefault('ehour', ehour)

        # For FM groups, end time is start of next group
        if g['group'].startswith('FM') and i+1 < len(groups):
            ns, nh, _, _ = parse_group_times(groups[i+1])
            if ns:
                g['eday'], g['ehour'] = ns, nh

        # Extract meteorological data
        g.update(parse_group_data(g['text']))
        rows.append(g)

    # Create dataframe
    df = pd.DataFrame(rows)
    
    # Check for required fields
    if 'sday' not in df.columns or 'shour' not in df.columns:
        raise ValueError("Missing start time information in parsed groups")

    # Create datetime objects
    df['st_dt'] = [
        make_datetime(year, month, r['sday'], r['shour']) 
        if pd.notna(r['sday']) else pd.NaT 
        for _, r in df.iterrows()
    ]
    
    df['end_dt'] = [
        adjust_end_if_earlier(s, make_datetime(year, month, r['eday'], r['ehour'])) 
        if pd.notna(r['eday']) else pd.NaT
        for s, (_, r) in zip(df['st_dt'], df.iterrows())
    ]

    # Sort by start time
    df = df.sort_values('st_dt').reset_index(drop=True)

    # Forward fill missing meteorological values
    df[['wind_dir', 'wind_spd', 'wind_gust', 'vis', 'weather', 'clouds']] = \
        df[['wind_dir', 'wind_spd', 'wind_gust', 'vis', 'weather', 'clouds']].ffill()

    # Add station and formatted dates
    df['Station'] = station
    df['st_date'] = df['st_dt'].dt.strftime('%d-%m-%Y %H:%M')
    df['end_date'] = df['end_dt'].dt.strftime('%d-%m-%Y %H:%M')

    return df[['Station', 'group', 'st_date', 'end_date',
               'wind_dir', 'wind_spd', 'wind_gust', 'vis', 'weather', 'clouds']]

# --------------------------
# RUN MAIN PIPELINE
# --------------------------

if __name__ == "__main__":
    # Configuration
    input_file = r"J:\Anand's Research\TAF Veri\PATNA\LONG TAF\2023\1.txt"
    output_folder = r"J:\Anand's Research\TAFVeri\PATNA\LONG TAF\2023\Decoded TAF_1"

    print("="*70)
    print("TAF PARSER - Enhanced with Error Checking")
    print("="*70)
    
    # Validate input file
    try:
        taf_list = clean_taf_ogimet_file(input_file)
    except Exception as e:
        print(f"\n[FATAL ERROR] Failed to read input file: {e}")
        exit(1)

    # Create output directory
    try:
        os.makedirs(output_folder, exist_ok=True)
        print(f"[INFO] Output folder ready: {output_folder}")
    except Exception as e:
        print(f"\n[FATAL ERROR] Cannot create output folder: {e}")
        exit(1)

    # Process each TAF
    print(f"\n{'='*70}")
    print(f"Processing {len(taf_list)} TAF report(s)...")
    print(f"{'='*70}\n")
    
    success_count = 0
    error_count = 0
    
    for i, taf in enumerate(taf_list, start=1):
        try:
            print(f"Processing TAF #{i}...")
            decoded = process_taf_to_df(taf)
            
            # Save to CSV
            out_file = os.path.join(output_folder, f"TAF_{i}_decoded.csv")
            decoded.to_csv(out_file, index=False, encoding='utf-8')
            
            print(f"  ✓ SUCCESS: {len(decoded)} records saved → {out_file}")
            success_count += 1
            
        except Exception as e:
            print(f"  ✗ ERROR in TAF #{i}: {e}")
            print(f"    TAF preview: {taf[:100]}...")
            error_count += 1
        
        print()

    # Summary
    print(f"{'='*70}")
    print(f"PROCESSING COMPLETE")
    print(f"{'='*70}")
    print(f"Total TAFs processed: {len(taf_list)}")
    print(f"  ✓ Successful: {success_count}")
    print(f"  ✗ Failed: {error_count}")
    print(f"{'='*70}")

TAF PARSER - Enhanced with Error Checking
[INFO] Found 116 TAF report(s) in file
[INFO] Output folder ready: J:\Anand's Research\TAFVeri\PATNA\LONG TAF\2023\Decoded TAF_1

Processing 116 TAF report(s)...

Processing TAF #1...
  ✓ SUCCESS: 6 records saved → J:\Anand's Research\TAFVeri\PATNA\LONG TAF\2023\Decoded TAF_1\TAF_1_decoded.csv

Processing TAF #2...
  ✓ SUCCESS: 3 records saved → J:\Anand's Research\TAFVeri\PATNA\LONG TAF\2023\Decoded TAF_1\TAF_2_decoded.csv

Processing TAF #3...
  ✓ SUCCESS: 5 records saved → J:\Anand's Research\TAFVeri\PATNA\LONG TAF\2023\Decoded TAF_1\TAF_3_decoded.csv

Processing TAF #4...
  ✓ SUCCESS: 6 records saved → J:\Anand's Research\TAFVeri\PATNA\LONG TAF\2023\Decoded TAF_1\TAF_4_decoded.csv

Processing TAF #5...
  ✓ SUCCESS: 5 records saved → J:\Anand's Research\TAFVeri\PATNA\LONG TAF\2023\Decoded TAF_1\TAF_5_decoded.csv

Processing TAF #6...
  ✓ SUCCESS: 7 records saved → J:\Anand's Research\TAFVeri\PATNA\LONG TAF\2023\Decoded TAF_1\TAF_6_decoded.cs

In [1]:
# =====================================================================
#   METAR DECODER - Enhanced with Error Checking
# =====================================================================
import os
import re
import pandas as pd
from datetime import datetime

# =====================================================================
#   CONFIG
# =====================================================================
metar_file = r"J:\Anand's Research\TAF Veri\PATNA\METAR\2023\1.txt"
output_csv = r"J:\Anand's Research\TAF Veri\PATNA\METAR\2023\1.csv"

# =====================================================================
#   READ METAR FILE
# =====================================================================
def read_metar_file(filepath):
    """Read METAR file with validation and error handling."""
    if not os.path.exists(filepath):
        raise FileNotFoundError(f"METAR file not found: {filepath}")
    
    if not os.path.isfile(filepath):
        raise ValueError(f"Path is not a file: {filepath}")
    
    try:
        with open(filepath, "r", encoding="utf-8") as f:
            lines = [line.strip() for line in f.readlines() if line.strip()]
    except UnicodeDecodeError:
        print("[WARNING] UTF-8 decode failed, trying with 'latin-1' encoding")
        with open(filepath, "r", encoding="latin-1") as f:
            lines = [line.strip() for line in f.readlines() if line.strip()]
    
    if not lines:
        raise ValueError(f"METAR file is empty or contains no valid lines: {filepath}")
    
    print(f"[INFO] Successfully read {len(lines)} METAR line(s) from file")
    return lines

# =====================================================================
#   METAR DECODER FUNCTIONS
# =====================================================================

def decode_station(text):
    """Extract station identifier (ICAO code) from METAR."""
    if not text or not isinstance(text, str):
        return None
    
    m = re.search(r"^(METAR|SPECI)?\s*([A-Z]{4})", text)
    if m:
        station = m.group(2)
        if len(station) != 4:
            print(f"[WARNING] Invalid station code length: {station}")
        return station
    
    print(f"[WARNING] No station code found in METAR: {text[:50]}...")
    return None


def decode_datetime(text):
    """Extract observation datetime from METAR."""
    if not text or not isinstance(text, str):
        return None
    
    m = re.search(r"\b(\d{2})(\d{2})(\d{2})Z\b", text)
    if not m:
        print(f"[WARNING] No valid datetime found in METAR: {text[:50]}...")
        return None

    day = int(m.group(1))
    hour = int(m.group(2))
    minute = int(m.group(3))
    
    # Validate day, hour, minute
    if not (1 <= day <= 31):
        print(f"[WARNING] Invalid day in METAR datetime: {day}")
        return None
    if not (0 <= hour <= 23):
        print(f"[WARNING] Invalid hour in METAR datetime: {hour}")
        return None
    if not (0 <= minute <= 59):
        print(f"[WARNING] Invalid minute in METAR datetime: {minute}")
        return None
    
    try:
        # Month and year fixed for January 2023
        return datetime(2023, 1, day, hour, minute)
    except ValueError as e:
        print(f"[WARNING] Failed to create datetime: {e}")
        return None


def decode_wind(text):
    """Extract wind direction, speed, and gust from METAR."""
    if not text or not isinstance(text, str):
        return None, None, None
    
    m = re.search(r"\b((\d{3})|VRB)(\d{2,3})(G(\d{2,3}))?KT\b", text)
    if not m:
        return None, None, None

    direction = m.group(1)
    speed = m.group(3)
    gust = m.group(5) if m.group(5) else None
    
    # Validate wind direction (if not VRB)
    if direction != "VRB":
        wind_dir_int = int(direction)
        if not (0 <= wind_dir_int <= 360):
            print(f"[WARNING] Invalid wind direction: {wind_dir_int}°")
    
    # Validate wind speed
    speed_int = int(speed)
    if speed_int > 150:
        print(f"[WARNING] Unusually high wind speed: {speed_int} KT")
    
    # Validate gust
    if gust:
        gust_int = int(gust)
        if gust_int > 200:
            print(f"[WARNING] Unusually high wind gust: {gust_int} KT")
        if speed_int > 0 and gust_int <= speed_int:
            print(f"[WARNING] Gust speed ({gust_int}) not greater than wind speed ({speed_int})")

    return direction, speed, gust


def decode_visibility(text):
    """Extract visibility in meters from METAR."""
    if not text or not isinstance(text, str):
        return None
    
    # Look for 4-digit visibility
    m = re.search(r"\b(\d{4})\b", text)
    if m:
        vis = int(m.group(1))
        if vis > 9999:
            print(f"[WARNING] Visibility exceeds maximum reporting value: {vis} m")
        return f"{m.group(1)} m"
    
    return None


def decode_weather_groups(text):
    """Extract weather phenomena (intensity, descriptor, phenomenon)."""
    if not text or not isinstance(text, str):
        return {}
    
    # Pattern for weather groups: [intensity][descriptor][phenomenon]
    pattern = r"(\+|\-|\bVC)?(TS|SH|FZ|BL|DR|MI|PR|BC)?(DZ|RA|SN|SG|IC|PL|GR|GS|UP|BR|FG|FU|VA|DU|SA|HZ|PY|PO|SQ|FC|SS|DS)"
    matches = re.findall(pattern, text)

    if not matches:
        return {}
    
    wx = {}
    for i, (inten, desc, phen) in enumerate(matches):
        idx = i + 1
        wx[f"WX{idx}_INT"] = inten if inten else ""
        wx[f"WX{idx}_DESC"] = desc if desc else ""
        wx[f"WX{idx}_PHEN"] = phen
        
        # Validate combinations
        if not phen:
            print(f"[WARNING] Weather group {idx} missing phenomenon")

    return wx


def decode_clouds(text):
    """Extract cloud layer information from METAR."""
    if not text or not isinstance(text, str):
        return None
    
    clouds = re.findall(r"\b(FEW|SCT|BKN|OVC)(\d{3})(CB|TCU)?\b", text)
    
    if clouds:
        cloud_list = []
        for amount, height, type_code in clouds:
            cloud_str = amount + height
            if type_code:
                cloud_str += type_code
            cloud_list.append(cloud_str)
            
            # Validate cloud height
            height_int = int(height)
            if height_int > 500:
                print(f"[WARNING] Unusually high cloud base: {height_int}00 feet")
        
        return ", ".join(cloud_list)
    
    return None


def decode_temp_dew(text):
    """Extract temperature and dewpoint from METAR."""
    if not text or not isinstance(text, str):
        return None, None
    
    m = re.search(r"\b(M?\d{2})/(M?\d{2})\b", text)
    if not m:
        return None, None

    def convert(val):
        """Convert temperature string to integer (M prefix = negative)."""
        if not val:
            return None
        return -int(val[1:]) if val.startswith('M') else int(val)

    temp = convert(m.group(1))
    dewpoint = convert(m.group(2))
    
    # Validate temperature range
    if temp is not None:
        if temp < -80 or temp > 60:
            print(f"[WARNING] Temperature outside normal range: {temp}°C")
    
    if dewpoint is not None:
        if dewpoint < -80 or dewpoint > 50:
            print(f"[WARNING] Dewpoint outside normal range: {dewpoint}°C")
    
    # Validate dewpoint <= temperature
    if temp is not None and dewpoint is not None:
        if dewpoint > temp:
            print(f"[WARNING] Dewpoint ({dewpoint}°C) greater than temperature ({temp}°C)")
    
    return temp, dewpoint


def decode_pressure(text):
    """Extract QNH pressure from METAR."""
    if not text or not isinstance(text, str):
        return None
    
    m = re.search(r"\bQ(\d{4})\b", text)
    if m:
        pressure = int(m.group(1))
        
        # Validate pressure range (typical range: 950-1050 hPa)
        if pressure < 900 or pressure > 1100:
            print(f"[WARNING] Pressure outside normal range: {pressure} hPa")
        
        return pressure
    
    return None


def validate_metar_format(metar):
    """Perform basic validation on METAR format."""
    if not metar or not isinstance(metar, str):
        return False, "Empty or invalid METAR text"
    
    if len(metar) < 20:
        return False, f"METAR too short (length: {len(metar)})"
    
    # Check for METAR or SPECI identifier
    if not re.search(r"^(METAR|SPECI)\s+[A-Z]{4}", metar):
        # Some METARs may not have METAR/SPECI prefix
        if not re.search(r"^[A-Z]{4}\s+\d{6}Z", metar):
            return False, "Missing required METAR/SPECI identifier or station code"
    
    # Check for datetime group
    if not re.search(r"\d{6}Z", metar):
        return False, "Missing datetime group (DDHHMM Z)"
    
    return True, "Valid format"

# =====================================================================
#   DECODE A SINGLE METAR
# =====================================================================
def decode_metar(metar, line_number=None):
    """Decode a single METAR report with comprehensive error handling."""
    
    # Validate METAR format
    is_valid, msg = validate_metar_format(metar)
    if not is_valid:
        prefix = f"[Line {line_number}] " if line_number else ""
        print(f"[WARNING] {prefix}Invalid METAR format: {msg}")
        print(f"          METAR: {metar[:80]}...")
    
    # Extract all components
    try:
        wind_dir, wind_speed, wind_gust = decode_wind(metar)
        temp, dewpoint = decode_temp_dew(metar)
        
        decoded = {
            "RAW_METAR": metar,
            "STATION": decode_station(metar),
            "DATETIME": decode_datetime(metar),
            "WIND_DIR": wind_dir,
            "WIND_SPEED": wind_speed,
            "WIND_GUST": wind_gust,
            "VISIBILITY": decode_visibility(metar),
            "CLOUDS": decode_clouds(metar),
            "TEMPERATURE": temp,
            "DEWPOINT": dewpoint,
            "PRESSURE_QNH": decode_pressure(metar)
        }
        
        # Add weather groups
        decoded.update(decode_weather_groups(metar))
        
        return decoded
        
    except Exception as e:
        prefix = f"[Line {line_number}] " if line_number else ""
        print(f"[ERROR] {prefix}Failed to decode METAR: {e}")
        print(f"        METAR: {metar[:80]}...")
        
        # Return minimal decoded structure
        return {
            "RAW_METAR": metar,
            "STATION": None,
            "DATETIME": None,
            "WIND_DIR": None,
            "WIND_SPEED": None,
            "WIND_GUST": None,
            "VISIBILITY": None,
            "CLOUDS": None,
            "TEMPERATURE": None,
            "DEWPOINT": None,
            "PRESSURE_QNH": None
        }

# =====================================================================
#   MAIN PROCESS
# =====================================================================
if __name__ == "__main__":
    print("=" * 70)
    print("METAR DECODER - Enhanced with Error Checking")
    print("=" * 70)
    
    try:
        # Read METAR file
        print("\n[STEP 1] Reading METAR file...")
        metar_list = read_metar_file(metar_file)
        print(f"[INFO] Total METARs loaded: {len(metar_list)}")
        
        # Decode all METARs
        print(f"\n[STEP 2] Decoding {len(metar_list)} METAR report(s)...")
        print("-" * 70)
        
        decoded_rows = []
        success_count = 0
        warning_count = 0
        error_count = 0
        
        for i, metar in enumerate(metar_list, start=1):
            if i % 100 == 0:
                print(f"[PROGRESS] Processed {i}/{len(metar_list)} METARs...")
            
            try:
                decoded = decode_metar(metar, line_number=i)
                decoded_rows.append(decoded)
                
                # Check if decoding was successful
                if decoded.get('STATION') and decoded.get('DATETIME'):
                    success_count += 1
                else:
                    warning_count += 1
                    
            except Exception as e:
                print(f"[ERROR] Failed to process METAR #{i}: {e}")
                error_count += 1
                # Add empty row to maintain alignment
                decoded_rows.append({
                    "RAW_METAR": metar,
                    "STATION": None,
                    "DATETIME": None
                })
        
        # Create DataFrame
        print(f"\n[STEP 3] Creating DataFrame...")
        df = pd.DataFrame(decoded_rows)
        
        # Validate DataFrame
        if df.empty:
            raise ValueError("No data to save - all METARs failed to decode")
        
        # Check for output directory
        output_dir = os.path.dirname(output_csv)
        if output_dir and not os.path.exists(output_dir):
            print(f"[INFO] Creating output directory: {output_dir}")
            os.makedirs(output_dir, exist_ok=True)
        
        # Save to CSV
        print(f"[STEP 4] Saving decoded METARs to CSV...")
        df.to_csv(output_csv, index=False, encoding="utf-8")
        
        # Summary
        print("\n" + "=" * 70)
        print("DECODING COMPLETE")
        print("=" * 70)
        print(f"Input file:        {metar_file}")
        print(f"Output file:       {output_csv}")
        print(f"Total METARs:      {len(metar_list)}")
        print(f"  ✓ Successful:    {success_count}")
        print(f"  ⚠ Warnings:      {warning_count}")
        print(f"  ✗ Errors:        {error_count}")
        print(f"Total rows saved:  {len(df)}")
        print("=" * 70)
        
        # Additional statistics
        if not df.empty:
            print("\nDATA STATISTICS:")
            print(f"  Unique stations: {df['STATION'].nunique()}")
            print(f"  Date range:      {df['DATETIME'].min()} to {df['DATETIME'].max()}")
            
            # Count missing values
            missing = df.isnull().sum()
            if missing.sum() > 0:
                print("\n  Missing values:")
                for col, count in missing[missing > 0].items():
                    if col != 'RAW_METAR':
                        print(f"    {col}: {count} ({count/len(df)*100:.1f}%)")
        
        print("\n[SUCCESS] Process completed successfully!")
        
    except FileNotFoundError as e:
        print(f"\n[FATAL ERROR] {e}")
        print("Please check the input file path.")
        exit(1)
        
    except ValueError as e:
        print(f"\n[FATAL ERROR] {e}")
        exit(1)
        
    except Exception as e:
        print(f"\n[FATAL ERROR] Unexpected error: {e}")
        import traceback
        traceback.print_exc()
        exit(1)

METAR DECODER - Enhanced with Error Checking

[STEP 1] Reading METAR file...
[INFO] Successfully read 1489 METAR line(s) from file
[INFO] Total METARs loaded: 1489

[STEP 2] Decoding 1489 METAR report(s)...
----------------------------------------------------------------------
          METAR: 202301010000 METAR VEPT 010000Z 27002KT 0200 R25/0500 FG VV/// 11/10 Q1018 NOSIG...
          METAR: 202301010030 METAR COR VEPT 010030Z 27002KT 0300 R25/0800 FG VV/// 11/10 Q1019 B...
          METAR: 202301010100 METAR VEPT 010100Z 27002KT 0400 R25/1000 FG VV/// 10/09 Q1019 NOSIG...
          METAR: 202301010130 METAR VEPT 010130Z 00000KT 0200 R25/0200 FG VV/// 10/09 Q1020 NOSIG...
          METAR: 202301010200 METAR VEPT 010200Z 00000KT 0100 R25/0125 FG VV/// 10/10 Q1020 NOSIG...
          METAR: 202301010230 METAR VEPT 010230Z 00000KT 0050 R25/0125 FG VV/// 11/10 Q1021 NOSIG...
          METAR: 202301010300 METAR VEPT 010300Z 00000KT 0050 R25/0125 FG VV/// 11/10 Q1021 NOSIG...
          METAR

In [3]:
# =====================================================================
#   METAR DECODER - Enhanced with Error Checking
# =====================================================================
import os
import re
import pandas as pd
from datetime import datetime

# =====================================================================
#   CONFIG
# =====================================================================
metar_file = r"J:\Anand's Research\TAF Veri\PATNA\METAR\2023\1.txt"
output_csv = r"J:\Anand's Research\TAF Veri\PATNA\METAR\2023\1.csv"

# =====================================================================
#   READ METAR FILE
# =====================================================================
def read_metar_file(filepath):
    """Read METAR file with validation and error handling."""
    if not os.path.exists(filepath):
        raise FileNotFoundError(f"METAR file not found: {filepath}")
    
    if not os.path.isfile(filepath):
        raise ValueError(f"Path is not a file: {filepath}")
    
    try:
        with open(filepath, "r", encoding="utf-8") as f:
            lines = [line.strip() for line in f.readlines() if line.strip()]
    except UnicodeDecodeError:
        print("[WARNING] UTF-8 decode failed, trying with 'latin-1' encoding")
        with open(filepath, "r", encoding="latin-1") as f:
            lines = [line.strip() for line in f.readlines() if line.strip()]
    
    if not lines:
        raise ValueError(f"METAR file is empty or contains no valid lines: {filepath}")
    
    print(f"[INFO] Successfully read {len(lines)} METAR line(s) from file")
    return lines

# =====================================================================
#   METAR DECODER FUNCTIONS
# =====================================================================

def decode_station(text):
    """Extract station identifier (ICAO code) from METAR."""
    if not text or not isinstance(text, str):
        return None
    
    # First try: standard format with optional timestamp prefix
    # Format: [YYYYMMDDHHMM] [METAR|SPECI] [COR] STATION
    m = re.search(r"(?:\d{12}\s+)?(?:METAR|SPECI)?\s*(?:COR\s+)?([A-Z]{4})\s+\d{6}Z", text)
    if m:
        station = m.group(1)
        if len(station) != 4:
            print(f"[WARNING] Invalid station code length: {station}")
        return station
    
    # Second try: simpler pattern
    m = re.search(r"\b([A-Z]{4})\s+\d{6}Z", text)
    if m:
        station = m.group(1)
        return station
    
    print(f"[WARNING] No station code found in METAR: {text[:50]}...")
    return None


def decode_datetime(text):
    """Extract observation datetime from METAR."""
    if not text or not isinstance(text, str):
        return None
    
    m = re.search(r"\b(\d{2})(\d{2})(\d{2})Z\b", text)
    if not m:
        print(f"[WARNING] No valid datetime found in METAR: {text[:50]}...")
        return None

    day = int(m.group(1))
    hour = int(m.group(2))
    minute = int(m.group(3))
    
    # Validate day, hour, minute
    if not (1 <= day <= 31):
        print(f"[WARNING] Invalid day in METAR datetime: {day}")
        return None
    if not (0 <= hour <= 23):
        print(f"[WARNING] Invalid hour in METAR datetime: {hour}")
        return None
    if not (0 <= minute <= 59):
        print(f"[WARNING] Invalid minute in METAR datetime: {minute}")
        return None
    
    try:
        # Month and year fixed for January 2023
        return datetime(2023, 1, day, hour, minute)
    except ValueError as e:
        print(f"[WARNING] Failed to create datetime: {e}")
        return None


def decode_wind(text):
    """Extract wind direction, speed, and gust from METAR."""
    if not text or not isinstance(text, str):
        return None, None, None
    
    m = re.search(r"\b((\d{3})|VRB)(\d{2,3})(G(\d{2,3}))?KT\b", text)
    if not m:
        return None, None, None

    direction = m.group(1)
    speed = m.group(3)
    gust = m.group(5) if m.group(5) else None
    
    # Validate wind direction (if not VRB)
    if direction != "VRB":
        wind_dir_int = int(direction)
        if not (0 <= wind_dir_int <= 360):
            print(f"[WARNING] Invalid wind direction: {wind_dir_int}°")
    
    # Validate wind speed
    speed_int = int(speed)
    if speed_int > 150:
        print(f"[WARNING] Unusually high wind speed: {speed_int} KT")
    
    # Validate gust
    if gust:
        gust_int = int(gust)
        if gust_int > 200:
            print(f"[WARNING] Unusually high wind gust: {gust_int} KT")
        if speed_int > 0 and gust_int <= speed_int:
            print(f"[WARNING] Gust speed ({gust_int}) not greater than wind speed ({speed_int})")

    return direction, speed, gust


def decode_visibility(text):
    """Extract visibility in meters from METAR."""
    if not text or not isinstance(text, str):
        return None
    
    # Look for 4-digit visibility
    m = re.search(r"\b(\d{4})\b", text)
    if m:
        vis = int(m.group(1))
        if vis > 9999:
            print(f"[WARNING] Visibility exceeds maximum reporting value: {vis} m")
        return f"{m.group(1)} m"
    
    return None


def decode_weather_groups(text):
    """Extract weather phenomena (intensity, descriptor, phenomenon)."""
    if not text or not isinstance(text, str):
        return {}
    
    # Pattern for weather groups: [intensity][descriptor][phenomenon]
    pattern = r"(\+|\-|\bVC)?(TS|SH|FZ|BL|DR|MI|PR|BC)?(DZ|RA|SN|SG|IC|PL|GR|GS|UP|BR|FG|FU|VA|DU|SA|HZ|PY|PO|SQ|FC|SS|DS)"
    matches = re.findall(pattern, text)

    if not matches:
        return {}
    
    wx = {}
    for i, (inten, desc, phen) in enumerate(matches):
        idx = i + 1
        wx[f"WX{idx}_INT"] = inten if inten else ""
        wx[f"WX{idx}_DESC"] = desc if desc else ""
        wx[f"WX{idx}_PHEN"] = phen
        
        # Validate combinations
        if not phen:
            print(f"[WARNING] Weather group {idx} missing phenomenon")

    return wx


def decode_clouds(text):
    """Extract cloud layer information from METAR."""
    if not text or not isinstance(text, str):
        return None
    
    clouds = re.findall(r"\b(FEW|SCT|BKN|OVC)(\d{3})(CB|TCU)?\b", text)
    
    if clouds:
        cloud_list = []
        for amount, height, type_code in clouds:
            cloud_str = amount + height
            if type_code:
                cloud_str += type_code
            cloud_list.append(cloud_str)
            
            # Validate cloud height
            height_int = int(height)
            if height_int > 500:
                print(f"[WARNING] Unusually high cloud base: {height_int}00 feet")
        
        return ", ".join(cloud_list)
    
    return None


def decode_temp_dew(text):
    """Extract temperature and dewpoint from METAR."""
    if not text or not isinstance(text, str):
        return None, None
    
    m = re.search(r"\b(M?\d{2})/(M?\d{2})\b", text)
    if not m:
        return None, None

    def convert(val):
        """Convert temperature string to integer (M prefix = negative)."""
        if not val:
            return None
        return -int(val[1:]) if val.startswith('M') else int(val)

    temp = convert(m.group(1))
    dewpoint = convert(m.group(2))
    
    # Validate temperature range
    if temp is not None:
        if temp < -80 or temp > 60:
            print(f"[WARNING] Temperature outside normal range: {temp}°C")
    
    if dewpoint is not None:
        if dewpoint < -80 or dewpoint > 50:
            print(f"[WARNING] Dewpoint outside normal range: {dewpoint}°C")
    
    # Validate dewpoint <= temperature
    if temp is not None and dewpoint is not None:
        if dewpoint > temp:
            print(f"[WARNING] Dewpoint ({dewpoint}°C) greater than temperature ({temp}°C)")
    
    return temp, dewpoint


def decode_pressure(text):
    """Extract QNH pressure from METAR."""
    if not text or not isinstance(text, str):
        return None
    
    m = re.search(r"\bQ(\d{4})\b", text)
    if m:
        pressure = int(m.group(1))
        
        # Validate pressure range (typical range: 950-1050 hPa)
        if pressure < 900 or pressure > 1100:
            print(f"[WARNING] Pressure outside normal range: {pressure} hPa")
        
        return pressure
    
    return None


def validate_metar_format(metar):
    """Perform basic validation on METAR format."""
    if not metar or not isinstance(metar, str):
        return False, "Empty or invalid METAR text"
    
    if len(metar) < 20:
        return False, f"METAR too short (length: {len(metar)})"
    
    # Check for datetime group (DDHHMM Z)
    if not re.search(r"\d{6}Z", metar):
        return False, "Missing datetime group (DDHHMM Z)"
    
    # Check for station code (4 letters before datetime)
    # Format: [YYYYMMDDHHMM] [METAR|SPECI] [COR] STATION DDHHMMZ
    if not re.search(r"[A-Z]{4}\s+\d{6}Z", metar):
        return False, "Missing station code or invalid format"
    
    return True, "Valid format"

# =====================================================================
#   DECODE A SINGLE METAR
# =====================================================================
def decode_metar(metar, line_number=None):
    """Decode a single METAR report with comprehensive error handling."""
    
    # Validate METAR format
    is_valid, msg = validate_metar_format(metar)
    if not is_valid:
        prefix = f"[Line {line_number}] " if line_number else ""
        print(f"[WARNING] {prefix}Invalid METAR format: {msg}")
        print(f"          METAR: {metar[:80]}...")
    
    # Extract all components
    try:
        wind_dir, wind_speed, wind_gust = decode_wind(metar)
        temp, dewpoint = decode_temp_dew(metar)
        
        decoded = {
            "RAW_METAR": metar,
            "STATION": decode_station(metar),
            "DATETIME": decode_datetime(metar),
            "WIND_DIR": wind_dir,
            "WIND_SPEED": wind_speed,
            "WIND_GUST": wind_gust,
            "VISIBILITY": decode_visibility(metar),
            "CLOUDS": decode_clouds(metar),
            "TEMPERATURE": temp,
            "DEWPOINT": dewpoint,
            "PRESSURE_QNH": decode_pressure(metar)
        }
        
        # Add weather groups
        decoded.update(decode_weather_groups(metar))
        
        return decoded
        
    except Exception as e:
        prefix = f"[Line {line_number}] " if line_number else ""
        print(f"[ERROR] {prefix}Failed to decode METAR: {e}")
        print(f"        METAR: {metar[:80]}...")
        
        # Return minimal decoded structure
        return {
            "RAW_METAR": metar,
            "STATION": None,
            "DATETIME": None,
            "WIND_DIR": None,
            "WIND_SPEED": None,
            "WIND_GUST": None,
            "VISIBILITY": None,
            "CLOUDS": None,
            "TEMPERATURE": None,
            "DEWPOINT": None,
            "PRESSURE_QNH": None
        }

# =====================================================================
#   MAIN PROCESS
# =====================================================================
if __name__ == "__main__":
    print("=" * 70)
    print("METAR DECODER - Enhanced with Error Checking")
    print("=" * 70)
    
    try:
        # Read METAR file
        print("\n[STEP 1] Reading METAR file...")
        metar_list = read_metar_file(metar_file)
        print(f"[INFO] Total METARs loaded: {len(metar_list)}")
        
        # Decode all METARs
        print(f"\n[STEP 2] Decoding {len(metar_list)} METAR report(s)...")
        print("-" * 70)
        
        decoded_rows = []
        success_count = 0
        warning_count = 0
        error_count = 0
        
        for i, metar in enumerate(metar_list, start=1):
            if i % 100 == 0:
                print(f"[PROGRESS] Processed {i}/{len(metar_list)} METARs...")
            
            try:
                decoded = decode_metar(metar, line_number=i)
                decoded_rows.append(decoded)
                
                # Check if decoding was successful
                if decoded.get('STATION') and decoded.get('DATETIME'):
                    success_count += 1
                else:
                    warning_count += 1
                    
            except Exception as e:
                print(f"[ERROR] Failed to process METAR #{i}: {e}")
                error_count += 1
                # Add empty row to maintain alignment
                decoded_rows.append({
                    "RAW_METAR": metar,
                    "STATION": None,
                    "DATETIME": None
                })
        
        # Create DataFrame
        print(f"\n[STEP 3] Creating DataFrame...")
        df = pd.DataFrame(decoded_rows)
        
        # Validate DataFrame
        if df.empty:
            raise ValueError("No data to save - all METARs failed to decode")
        
        # Check for output directory
        output_dir = os.path.dirname(output_csv)
        if output_dir and not os.path.exists(output_dir):
            print(f"[INFO] Creating output directory: {output_dir}")
            os.makedirs(output_dir, exist_ok=True)
        
        # Save to CSV
        print(f"[STEP 4] Saving decoded METARs to CSV...")
        df.to_csv(output_csv, index=False, encoding="utf-8")
        
        # Summary
        print("\n" + "=" * 70)
        print("DECODING COMPLETE")
        print("=" * 70)
        print(f"Input file:        {metar_file}")
        print(f"Output file:       {output_csv}")
        print(f"Total METARs:      {len(metar_list)}")
        print(f"  ✓ Successful:    {success_count}")
        print(f"  ⚠ Warnings:      {warning_count}")
        print(f"  ✗ Errors:        {error_count}")
        print(f"Total rows saved:  {len(df)}")
        print("=" * 70)
        
        # Additional statistics
        if not df.empty:
            print("\nDATA STATISTICS:")
            print(f"  Unique stations: {df['STATION'].nunique()}")
            print(f"  Date range:      {df['DATETIME'].min()} to {df['DATETIME'].max()}")
            
            # Count missing values
            missing = df.isnull().sum()
            if missing.sum() > 0:
                print("\n  Missing values:")
                for col, count in missing[missing > 0].items():
                    if col != 'RAW_METAR':
                        print(f"    {col}: {count} ({count/len(df)*100:.1f}%)")
        
        print("\n[SUCCESS] Process completed successfully!")
        
    except FileNotFoundError as e:
        print(f"\n[FATAL ERROR] {e}")
        print("Please check the input file path.")
        exit(1)
        
    except ValueError as e:
        print(f"\n[FATAL ERROR] {e}")
        exit(1)
        
    except Exception as e:
        print(f"\n[FATAL ERROR] Unexpected error: {e}")
        import traceback
        traceback.print_exc()
        exit(1)

METAR DECODER - Enhanced with Error Checking

[STEP 1] Reading METAR file...
[INFO] Successfully read 1489 METAR line(s) from file
[INFO] Total METARs loaded: 1489

[STEP 2] Decoding 1489 METAR report(s)...
----------------------------------------------------------------------
[PROGRESS] Processed 100/1489 METARs...
[PROGRESS] Processed 200/1489 METARs...
[PROGRESS] Processed 300/1489 METARs...
[PROGRESS] Processed 400/1489 METARs...
[PROGRESS] Processed 500/1489 METARs...
[PROGRESS] Processed 600/1489 METARs...
[PROGRESS] Processed 700/1489 METARs...
[PROGRESS] Processed 800/1489 METARs...
[PROGRESS] Processed 900/1489 METARs...
[PROGRESS] Processed 1000/1489 METARs...
[PROGRESS] Processed 1100/1489 METARs...
[PROGRESS] Processed 1200/1489 METARs...
[PROGRESS] Processed 1300/1489 METARs...
[PROGRESS] Processed 1400/1489 METARs...

[STEP 3] Creating DataFrame...
[STEP 4] Saving decoded METARs to CSV...

DECODING COMPLETE
Input file:        J:\Anand's Research\TAF Veri\PATNA\METAR\2023\

In [2]:
# =========================================================
# TAF VERIFICATION - Following WMO/ICAO Annex 3 Criteria
# Based on Austro Control TAF Verification Methodology
# =========================================================
import os
import math
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# =========================================================
# CONFIG: file paths
# =========================================================
taf_file = r"J:\Anand's Research\TAF Veri\PATNA\LONG TAF\2023\Decoded TAF_1\TAF_1_decoded.csv"
metar_file = r"J:\Anand's Research\TAF Veri\PATNA\METAR\2023\1.csv"
output_file = r"J:\Anand's Research\TAF Veri\PATNA\TAF_Verification_Results.csv"

# =========================================================
# WMO/ICAO THRESHOLDS AND CRITERIA
# =========================================================

# Wind thresholds (knots) - ICAO Annex 3, Appendix 5
WIND_SPEED_THRESHOLDS = [7, 15, 25, 35, 45, 55]
WIND_DIR_THRESHOLD_DEGREES = 30  # Austro Control uses 30°, ICAO suggests 60°
WIND_SPEED_THRESHOLD_MIN = 7  # Minimum speed for direction verification (kt)
WIND_SPEED_TOLERANCE = 10  # ±10 kt for wind speed accuracy

# Visibility thresholds (meters) - ICAO Annex 3
VISIBILITY_THRESHOLDS = [150, 350, 600, 800, 1500, 3000, 5000, 9999]

# Cloud ceiling thresholds (feet) - ICAO Annex 3
CEILING_THRESHOLDS = [100, 200, 500, 1000, 1500]

# Cloud amount hierarchy for matching
CLOUD_AMOUNT_ORDER = {"CLR": 0, "SKC": 0, "FEW": 1, "SCT": 2, "BKN": 3, "OVC": 4, "VV": 5}

# Weather phenomena classes (based on ICAO Annex 3)
WEATHER_CLASSES = {
    'NSW': 0,      # No significant weather
    'FZFG': 1,     # Freezing fog
    'RA': 2,       # Moderate/heavy rain (including showers)
    'BLSN': 3,     # Drifting/blowing snow
    'SN': 4,       # Moderate/heavy snow
    'FZRA': 5,     # Freezing rain/drizzle
    'TS': 6        # Thunderstorm
}

# =========================================================
# HELPER FUNCTIONS
# =========================================================

def safe_float(x):
    """Safely convert to float, return NaN if not possible."""
    try:
        if pd.isna(x):
            return np.nan
        return float(x)
    except (ValueError, TypeError):
        return np.nan

def safe_int(x):
    """Safely convert to int, return None if not possible."""
    try:
        if pd.isna(x):
            return None
        return int(float(x))
    except (ValueError, TypeError):
        return None

# =========================================================
# CLOUD PARSING FUNCTIONS
# =========================================================

def parse_cloud_layer(cloud_str):
    """
    Parse a single cloud layer token like 'BKN030', 'FEW008CB', 'OVC///', 'CLR'.
    Returns: (amount, height_feet, convective)
    - amount: string like 'BKN', 'OVC', 'CLR', or None
    - height_feet: integer height in feet, or None if not available
    - convective: True if CB/TCU present, else False
    """
    if not isinstance(cloud_str, str):
        return None, None, False
    
    cloud_str = cloud_str.strip().upper()
    if not cloud_str or cloud_str in ('NIL', ''):
        return None, None, False
    
    # Special cases
    if cloud_str in ('CLR', 'SKC', 'NSC', 'NCD', 'CAVOK'):
        return 'CLR', None, False
    
    # Vertical visibility (obscured sky)
    import re
    vv_match = re.match(r'VV(\d{3})', cloud_str)
    if vv_match:
        height = int(vv_match.group(1)) * 100  # Convert to feet
        return 'VV', height, False
    
    # Standard cloud layer: AMT + height + optional CB/TCU
    # Pattern: (FEW|SCT|BKN|OVC)(\d{3})(CB|TCU)?
    cloud_match = re.match(r'(FEW|SCT|BKN|OVC|NSC|NCD)(\d{3}|\/{2,3})(CB|TCU)?', cloud_str)
    if cloud_match:
        amount = cloud_match.group(1)
        height_str = cloud_match.group(2)
        convective_type = cloud_match.group(3)
        
        # Parse height
        if '/' in height_str:
            height_feet = None  # Height not available
        else:
            height_feet = int(height_str) * 100
        
        convective = bool(convective_type)
        return amount, height_feet, convective
    
    # If no pattern matched, try to extract just amount
    amt_match = re.match(r'(FEW|SCT|BKN|OVC|VV)', cloud_str)
    if amt_match:
        return amt_match.group(1), None, False
    
    return None, None, False

def parse_cloud_field(cloud_field):
    """
    Parse TAF or METAR clouds field into list of cloud layers.
    Returns: List of tuples [(amount, height_feet, convective), ...]
    """
    if pd.isna(cloud_field):
        return []
    
    # Split by comma or space
    cloud_str = str(cloud_field).replace(',', ' ')
    tokens = cloud_str.split()
    
    layers = []
    for token in tokens:
        amount, height, conv = parse_cloud_layer(token)
        if amount:  # Only add if we got a valid amount
            layers.append((amount, height, conv))
    
    return layers

def get_ceiling(cloud_layers):
    """
    Get ceiling from cloud layers.
    Ceiling is defined as lowest layer with BKN, OVC, or VV.
    Returns: height in feet, or None if no ceiling
    """
    if not cloud_layers:
        return None
    
    for amount, height, conv in cloud_layers:
        if amount in ('BKN', 'OVC', 'VV'):
            return height
    
    return None

def classify_ceiling(height_feet):
    """
    Classify ceiling height into ICAO threshold categories.
    Returns: threshold index (0 to len(CEILING_THRESHOLDS))
    """
    if height_feet is None:
        return len(CEILING_THRESHOLDS)  # No ceiling = highest category
    
    for i, threshold in enumerate(CEILING_THRESHOLDS):
        if height_feet < threshold:
            return i
    
    return len(CEILING_THRESHOLDS)

# =========================================================
# VISIBILITY FUNCTIONS
# =========================================================

def parse_visibility(vis_field):
    """
    Parse visibility field to meters.
    Handles: '5000', '5000 m', '9999', 'CAVOK', etc.
    """
    if pd.isna(vis_field):
        return None
    
    vis_str = str(vis_field).upper().strip()
    
    # CAVOK = visibility 10km or more
    if 'CAVOK' in vis_str:
        return 10000
    
    # Extract numeric value
    import re
    num_match = re.search(r'(\d+)', vis_str)
    if num_match:
        vis_m = int(num_match.group(1))
        return vis_m
    
    return None

def classify_visibility(vis_meters):
    """
    Classify visibility into ICAO threshold categories.
    Returns: threshold index (0 to len(VISIBILITY_THRESHOLDS))
    """
    if vis_meters is None:
        return None
    
    for i, threshold in enumerate(VISIBILITY_THRESHOLDS):
        if vis_meters <= threshold:
            return i
    
    return len(VISIBILITY_THRESHOLDS)  # Above all thresholds

def visibility_accurate_icao(forecast_m, observed_m):
    """
    Check if visibility forecast is accurate per ICAO Annex 3 Amendment criteria.
    - Visibility ≤ 800m: ±200m or ±50%
    - Visibility > 800m: ±30% or ±1500m (whichever is greater)
    """
    if forecast_m is None or observed_m is None:
        return False
    
    forecast_m = float(forecast_m)
    observed_m = float(observed_m)
    
    if forecast_m <= 800:
        # Use ±200m tolerance
        tolerance = 200
    else:
        # Use ±30% tolerance
        tolerance = max(forecast_m * 0.30, 1500)
    
    return abs(forecast_m - observed_m) <= tolerance

# =========================================================
# WIND FUNCTIONS
# =========================================================

def parse_wind_direction(wd_field):
    """Parse wind direction, handling VRB."""
    if pd.isna(wd_field):
        return None
    
    wd_str = str(wd_field).upper().strip()
    if wd_str == 'VRB':
        return 'VRB'
    
    try:
        return int(float(wd_str))
    except (ValueError, TypeError):
        return None

def wind_direction_accurate(forecast_dir, observed_dir, observed_speed):
    """
    Check wind direction accuracy per ICAO criteria.
    - Only verify if observed wind speed ≥ threshold (7 kt)
    - Allow ±30° deviation (Austro Control) or ±60° (ICAO)
    - VRB forecast = always accept
    """
    if observed_speed is None or observed_speed < WIND_SPEED_THRESHOLD_MIN:
        return True  # Direction not operationally significant at low speeds
    
    if forecast_dir is None:
        return False
    
    if forecast_dir == 'VRB':
        return True  # VRB forecast is always acceptable
    
    if observed_dir is None or observed_dir == 'VRB':
        return False  # Can't verify against VRB observation
    
    try:
        forecast_dir = float(forecast_dir)
        observed_dir = float(observed_dir)
    except (ValueError, TypeError):
        return False
    
    # Calculate angular difference
    diff = abs(forecast_dir - observed_dir)
    if diff > 180:
        diff = 360 - diff
    
    return diff <= WIND_DIR_THRESHOLD_DEGREES

def classify_wind_speed(speed_kt):
    """
    Classify wind speed into ICAO threshold categories.
    Returns: threshold index (0 to len(WIND_SPEED_THRESHOLDS))
    """
    if speed_kt is None:
        return None
    
    for i, threshold in enumerate(WIND_SPEED_THRESHOLDS):
        if speed_kt < threshold:
            return i
    
    return len(WIND_SPEED_THRESHOLDS)

def wind_speed_accurate(forecast_kt, observed_kt):
    """
    Check wind speed accuracy: ±10 kt tolerance.
    """
    if forecast_kt is None or observed_kt is None:
        return False
    
    return abs(float(forecast_kt) - float(observed_kt)) <= WIND_SPEED_TOLERANCE

# =========================================================
# WEATHER PHENOMENA FUNCTIONS
# =========================================================

def parse_weather_phenomena(wx_field):
    """
    Parse weather phenomena into standardized codes.
    Returns: Set of weather codes present
    """
    if pd.isna(wx_field):
        return set()
    
    wx_str = str(wx_field).upper()
    
    # Check for no significant weather
    if wx_str in ('NSW', 'NIL', '', 'NONE'):
        return {'NSW'}
    
    wx_codes = set()
    
    # Check for significant phenomena
    if 'FZFG' in wx_str:
        wx_codes.add('FZFG')
    if 'FZRA' in wx_str or 'FZDZ' in wx_str:
        wx_codes.add('FZRA')
    if 'TS' in wx_str:
        wx_codes.add('TS')
    if 'BLSN' in wx_str or 'DRSN' in wx_str:
        wx_codes.add('BLSN')
    
    # Check for precipitation types
    if any(x in wx_str for x in ['RA', 'SHRA', 'TSRA', '+RA', '-RA']):
        wx_codes.add('RA')
    if any(x in wx_str for x in ['SN', 'SHSN', 'TSSN', '+SN', '-SN']):
        wx_codes.add('SN')
    
    if not wx_codes:
        wx_codes.add('NSW')
    
    return wx_codes

def combine_metar_weather(metar_row):
    """
    Combine all METAR weather fields into single set.
    """
    wx_set = set()
    
    # Combine all WX fields
    for prefix in ['WX1', 'WX2', 'WX3', 'WX4']:
        for suffix in ['_INT', '_DESC', '_PHEN']:
            col = prefix + suffix
            if col in metar_row.index:
                val = metar_row[col]
                if isinstance(val, str) and val.strip():
                    wx_set.add(val.strip().upper())
    
    # Parse combined string
    combined = ' '.join(wx_set)
    return parse_weather_phenomena(combined)

def weather_accurate(forecast_wx, observed_wx):
    """
    Check if weather phenomena match.
    Follows "occurrence-based" verification.
    """
    # If both forecast no significant weather
    if forecast_wx == {'NSW'} and observed_wx == {'NSW'}:
        return True
    
    # If forecast has weather and observation has matching weather
    if forecast_wx != {'NSW'} and observed_wx != {'NSW'}:
        # Check for any overlap in phenomena
        if forecast_wx & observed_wx:  # Set intersection
            return True
        # If forecast has TS and observation has any convective weather
        if 'TS' in forecast_wx and any(x in observed_wx for x in ['TS', 'TSRA', 'TSSN']):
            return True
        return False
    
    # Mismatch: one has weather, other doesn't
    return False

# =========================================================
# CLOUD VERIFICATION FUNCTIONS
# =========================================================

def clouds_accurate_ceiling(forecast_layers, observed_layers):
    """
    Verify clouds based on ceiling criteria (ICAO Annex 3).
    Uses lowest BKN/OVC/VV layer as ceiling.
    """
    forecast_ceiling = get_ceiling(forecast_layers)
    observed_ceiling = get_ceiling(observed_layers)
    
    # Classify ceilings
    forecast_class = classify_ceiling(forecast_ceiling)
    observed_class = classify_ceiling(observed_ceiling)
    
    # Allow one category difference
    return abs(forecast_class - observed_class) <= 1

def clouds_accurate_amount_and_height(forecast_layers, observed_layers):
    """
    Verify clouds based on amount and height matching.
    More detailed verification than ceiling-only.
    """
    if not forecast_layers and not observed_layers:
        return True  # Both clear
    
    if not forecast_layers:
        # Forecast clear, check if observations are insignificant
        for amt, _, _ in observed_layers:
            if amt in ('BKN', 'OVC', 'VV'):
                return False  # Observed ceiling but none forecast
        return True
    
    if not observed_layers:
        # Observed clear, check if forecast is insignificant
        for amt, _, _ in forecast_layers:
            if amt in ('BKN', 'OVC', 'VV'):
                return False  # Forecast ceiling but none observed
        return True
    
    # Match primary layer (usually lowest significant)
    # Get first BKN/OVC layer from each
    forecast_primary = None
    for amt, height, conv in forecast_layers:
        if amt in ('BKN', 'OVC', 'VV'):
            forecast_primary = (amt, height)
            break
    
    observed_primary = None
    for amt, height, conv in observed_layers:
        if amt in ('BKN', 'OVC', 'VV'):
            observed_primary = (amt, height)
            break
    
    if forecast_primary is None and observed_primary is None:
        return True  # No significant layers in either
    
    if forecast_primary is None or observed_primary is None:
        return False  # One has ceiling, other doesn't
    
    fcst_amt, fcst_height = forecast_primary
    obs_amt, obs_height = observed_primary
    
    # Amount must be same or adjacent
    if fcst_amt not in ('BKN', 'OVC', 'VV') or obs_amt not in ('BKN', 'OVC', 'VV'):
        return False
    
    # Height tolerance
    if fcst_height is None or obs_height is None:
        # Can't verify height, accept based on amount
        return True
    
    # ICAO tolerance: ±1 category
    fcst_class = classify_ceiling(fcst_height)
    obs_class = classify_ceiling(obs_height)
    
    return abs(fcst_class - obs_class) <= 1

# =========================================================
# DATA LOADING AND VALIDATION
# =========================================================

def load_and_validate_data():
    """Load TAF and METAR data with validation."""
    print("=" * 70)
    print("TAF VERIFICATION - WMO/ICAO Annex 3 Criteria")
    print("=" * 70)
    
    # Load files
    print("\n[STEP 1] Loading data files...")
    
    if not os.path.exists(taf_file):
        raise FileNotFoundError(f"TAF file not found: {taf_file}")
    if not os.path.exists(metar_file):
        raise FileNotFoundError(f"METAR file not found: {metar_file}")
    
    taf_df = pd.read_csv(taf_file)
    metar_df = pd.read_csv(metar_file)
    
    print(f"  TAF records loaded: {len(taf_df)}")
    print(f"  METAR records loaded: {len(metar_df)}")
    
    # Validate required columns
    taf_required = ['st_date', 'end_date', 'wind_dir', 'wind_spd', 'vis', 'weather', 'clouds']
    metar_required = ['DATETIME', 'WIND_DIR', 'WIND_SPEED', 'VISIBILITY', 'CLOUDS']
    
    missing_taf = [c for c in taf_required if c not in taf_df.columns]
    missing_metar = [c for c in metar_required if c not in metar_df.columns]
    
    if missing_taf:
        raise ValueError(f"Missing TAF columns: {', '.join(missing_taf)}")
    if missing_metar:
        raise ValueError(f"Missing METAR columns: {', '.join(missing_metar)}")
    
    print("  ✓ All required columns present")
    
    return taf_df, metar_df

def prepare_dataframes(taf_df, metar_df):
    """Parse and normalize datetime and numeric fields."""
    print("\n[STEP 2] Parsing and normalizing data...")
    
    # TAF datetimes
    taf_df['st_date'] = pd.to_datetime(taf_df['st_date'], dayfirst=True, errors='coerce')
    taf_df['end_date'] = pd.to_datetime(taf_df['end_date'], dayfirst=True, errors='coerce')
    
    # METAR datetimes
    metar_df['DATETIME'] = pd.to_datetime(metar_df['DATETIME'], errors='coerce')
    
    # Remove rows with invalid datetimes
    taf_df = taf_df.dropna(subset=['st_date', 'end_date'])
    metar_df = metar_df.dropna(subset=['DATETIME'])
    
    print(f"  TAF periods after datetime parsing: {len(taf_df)}")
    print(f"  METAR observations after datetime parsing: {len(metar_df)}")
    
    return taf_df, metar_df

# =========================================================
# MAIN VERIFICATION LOOP
# =========================================================

def verify_taf_against_metar():
    """Main verification function."""
    
    # Load data
    try:
        taf_df, metar_df = load_and_validate_data()
        taf_df, metar_df = prepare_dataframes(taf_df, metar_df)
    except Exception as e:
        print(f"\n[ERROR] Data loading failed: {e}")
        return
    
    print("\n[STEP 3] Matching METAR observations to TAF periods...")
    print("-" * 70)
    
    all_verifications = []
    taf_periods_processed = 0
    metars_matched = 0
    
    # Process each TAF period
    for idx, taf_row in taf_df.iterrows():
        taf_start = taf_row['st_date']
        taf_end = taf_row['end_date']
        
        # Find METARs within this TAF period
        mask = (metar_df['DATETIME'] >= taf_start) & (metar_df['DATETIME'] <= taf_end)
        matching_metars = metar_df[mask]
        
        if matching_metars.empty:
            continue
        
        taf_periods_processed += 1
        
        # Parse TAF forecast elements
        taf_wind_dir = parse_wind_direction(taf_row.get('wind_dir'))
        taf_wind_spd = safe_float(taf_row.get('wind_spd'))
        taf_vis = parse_visibility(taf_row.get('vis'))
        taf_weather = parse_weather_phenomena(taf_row.get('weather'))
        taf_clouds = parse_cloud_field(taf_row.get('clouds'))
        taf_ceiling = get_ceiling(taf_clouds)
        
        # Process each matching METAR
        for _, metar_row in matching_metars.iterrows():
            metars_matched += 1
            
            # Parse METAR observed elements
            obs_wind_dir = parse_wind_direction(metar_row.get('WIND_DIR'))
            obs_wind_spd = safe_float(metar_row.get('WIND_SPEED'))
            obs_vis = parse_visibility(metar_row.get('VISIBILITY'))
            obs_weather = combine_metar_weather(metar_row)
            obs_clouds = parse_cloud_field(metar_row.get('CLOUDS'))
            obs_ceiling = get_ceiling(obs_clouds)
            
            # Perform verification checks
            wd_ok = wind_direction_accurate(taf_wind_dir, obs_wind_dir, obs_wind_spd)
            ws_ok = wind_speed_accurate(taf_wind_spd, obs_wind_spd)
            vis_ok = visibility_accurate_icao(taf_vis, obs_vis)
            wx_ok = weather_accurate(taf_weather, obs_weather)
            cloud_ceiling_ok = clouds_accurate_ceiling(taf_clouds, obs_clouds)
            cloud_detail_ok = clouds_accurate_amount_and_height(taf_clouds, obs_clouds)
            
            # Record verification result
            verification_row = {
                # TAF period info
                'TAF_START': taf_start,
                'TAF_END': taf_end,
                'TAF_GROUP': taf_row.get('group', 'INITIAL'),
                
                # Observation time
                'OBS_DATETIME': metar_row['DATETIME'],
                
                # Wind - Forecast
                'FCST_WIND_DIR': taf_wind_dir,
                'FCST_WIND_SPD_KT': taf_wind_spd,
                
                # Wind - Observed
                'OBS_WIND_DIR': obs_wind_dir,
                'OBS_WIND_SPD_KT': obs_wind_spd,
                
                # Wind - Verification
                'WIND_DIR_ACCURATE': wd_ok,
                'WIND_SPD_ACCURATE': ws_ok,
                
                # Visibility - Forecast
                'FCST_VIS_M': taf_vis,
                'FCST_VIS_CLASS': classify_visibility(taf_vis),
                
                # Visibility - Observed
                'OBS_VIS_M': obs_vis,
                'OBS_VIS_CLASS': classify_visibility(obs_vis),
                
                # Visibility - Verification
                'VIS_ACCURATE': vis_ok,
                
                # Weather - Forecast
                'FCST_WEATHER': ', '.join(sorted(taf_weather)),
                
                # Weather - Observed
                'OBS_WEATHER': ', '.join(sorted(obs_weather)),
                
                # Weather - Verification
                'WEATHER_ACCURATE': wx_ok,
                
                # Clouds - Forecast
                'FCST_CLOUDS': taf_row.get('clouds', ''),
                'FCST_CEILING_FT': taf_ceiling,
                'FCST_CEILING_CLASS': classify_ceiling(taf_ceiling),
                
                # Clouds - Observed
                'OBS_CLOUDS': metar_row.get('CLOUDS', ''),
                'OBS_CEILING_FT': obs_ceiling,
                'OBS_CEILING_CLASS': classify_ceiling(obs_ceiling),
                
                # Clouds - Verification
                'CEILING_ACCURATE': cloud_ceiling_ok,
                'CLOUDS_ACCURATE_DETAIL': cloud_detail_ok,
            }
            
            all_verifications.append(verification_row)
        
        # Progress indicator
        if taf_periods_processed % 10 == 0:
            print(f"  Processed {taf_periods_processed} TAF periods, {metars_matched} METAR matches...")
    
    print(f"\n  ✓ Completed: {taf_periods_processed} TAF periods, {metars_matched} total verifications")
    
    # Create results DataFrame
    if not all_verifications:
        print("\n[WARNING] No matching METAR observations found for any TAF period!")
        return pd.DataFrame()
    
    results_df = pd.DataFrame(all_verifications)
    
    # Calculate and display accuracy statistics
    print_verification_statistics(results_df)
    
    # Save results
    save_results(results_df)
    
    return results_df

# =========================================================
# STATISTICS AND REPORTING
# =========================================================

def print_verification_statistics(results_df):
    """Calculate and print verification statistics."""
    print("\n" + "=" * 70)
    print("VERIFICATION STATISTICS (WMO/ICAO Criteria)")
    print("=" * 70)
    
    total = len(results_df)
    
    if total == 0:
        print("No verifications to analyze.")
        return
    
    print(f"\nTotal verifications: {total}")
    
    # Wind Direction
    wd_count = results_df['WIND_DIR_ACCURATE'].notna().sum()
    wd_accurate = results_df['WIND_DIR_ACCURATE'].sum()
    wd_pct = (wd_accurate / wd_count * 100) if wd_count > 0 else 0
    print(f"\nWind Direction:")
    print(f"  Verified: {wd_count} | Accurate: {wd_accurate} | Accuracy: {wd_pct:.1f}%")
    
    # Wind Speed
    ws_count = results_df['WIND_SPD_ACCURATE'].notna().sum()
    ws_accurate = results_df['WIND_SPD_ACCURATE'].sum()
    ws_pct = (ws_accurate / ws_count * 100) if ws_count > 0 else 0
    print(f"\nWind Speed (±10 kt):")
    print(f"  Verified: {ws_count} | Accurate: {ws_accurate} | Accuracy: {ws_pct:.1f}%")
    
    # Visibility
    vis_count = results_df['VIS_ACCURATE'].notna().sum()
    vis_accurate = results_df['VIS_ACCURATE'].sum()
    vis_pct = (vis_accurate / vis_count * 100) if vis_count > 0 else 0
    print(f"\nVisibility (ICAO Annex 3 criteria):")
    print(f"  Verified: {vis_count} | Accurate: {vis_accurate} | Accuracy: {vis_pct:.1f}%")
    
    # Weather
    wx_count = results_df['WEATHER_ACCURATE'].notna().sum()
    wx_accurate = results_df['WEATHER_ACCURATE'].sum()
    wx_pct = (wx_accurate / wx_count * 100) if wx_count > 0 else 0
    print(f"\nWeather Phenomena (occurrence-based):")
    print(f"  Verified: {wx_count} | Accurate: {wx_accurate} | Accuracy: {wx_pct:.1f}%")
    
    # Ceiling
    ceiling_count = results_df['CEILING_ACCURATE'].notna().sum()
    ceiling_accurate = results_df['CEILING_ACCURATE'].sum()
    ceiling_pct = (ceiling_accurate / ceiling_count * 100) if ceiling_count > 0 else 0
    print(f"\nCeiling (ICAO threshold categories):")
    print(f"  Verified: {ceiling_count} | Accurate: {ceiling_accurate} | Accuracy: {ceiling_pct:.1f}%")
    
    # Clouds (detailed)
    cloud_count = results_df['CLOUDS_ACCURATE_DETAIL'].notna().sum()
    cloud_accurate = results_df['CLOUDS_ACCURATE_DETAIL'].sum()
    cloud_pct = (cloud_accurate / cloud_count * 100) if cloud_count > 0 else 0
    print(f"\nClouds (amount + height matching):")
    print(f"  Verified: {cloud_count} | Accurate: {cloud_accurate} | Accuracy: {cloud_pct:.1f}%")
    
    # Overall
    print(f"\n" + "-" * 70)
    print("OVERALL SUMMARY:")
    accuracy_cols = ['WIND_DIR_ACCURATE', 'WIND_SPD_ACCURATE', 'VIS_ACCURATE', 
                     'WEATHER_ACCURATE', 'CEILING_ACCURATE']
    overall_accurate = results_df[accuracy_cols].all(axis=1).sum()
    overall_pct = (overall_accurate / total * 100)
    print(f"  All elements accurate: {overall_accurate}/{total} ({overall_pct:.1f}%)")
    print("=" * 70)

def save_results(results_df):
    """Save verification results to CSV."""
    print(f"\n[STEP 4] Saving results...")
    
    try:
        # Create output directory if needed
        output_dir = os.path.dirname(output_file)
        if output_dir and not os.path.exists(output_dir):
            os.makedirs(output_dir, exist_ok=True)
        
        # Save to CSV
        results_df.to_csv(output_file, index=False, encoding='utf-8')
        print(f"  ✓ Results saved to: {output_file}")
        print(f"  Total rows: {len(results_df)}")
        
    except Exception as e:
        print(f"  ✗ Failed to save results: {e}")

# =========================================================
# MAIN EXECUTION
# =========================================================

if __name__ == "__main__":
    try:
        results = verify_taf_against_metar()
        print("\n[SUCCESS] TAF verification completed!")
        
    except FileNotFoundError as e:
        print(f"\n[ERROR] {e}")
        exit(1)
    except Exception as e:
        print(f"\n[ERROR] Unexpected error: {e}")
        import traceback
        traceback.print_exc()
        exit(1)

TAF VERIFICATION - WMO/ICAO Annex 3 Criteria

[STEP 1] Loading data files...
  TAF records loaded: 6
  METAR records loaded: 1489
  ✓ All required columns present

[STEP 2] Parsing and normalizing data...
  TAF periods after datetime parsing: 6
  METAR observations after datetime parsing: 1489

[STEP 3] Matching METAR observations to TAF periods...
----------------------------------------------------------------------

  ✓ Completed: 6 TAF periods, 30 total verifications

VERIFICATION STATISTICS (WMO/ICAO Criteria)

Total verifications: 30

Wind Direction:
  Verified: 30 | Accurate: 30 | Accuracy: 100.0%

Wind Speed (±10 kt):
  Verified: 30 | Accurate: 30 | Accuracy: 100.0%

Visibility (ICAO Annex 3 criteria):
  Verified: 30 | Accurate: 15 | Accuracy: 50.0%

Weather Phenomena (occurrence-based):
  Verified: 30 | Accurate: 30 | Accuracy: 100.0%

Ceiling (ICAO threshold categories):
  Verified: 30 | Accurate: 30 | Accuracy: 100.0%

Clouds (amount + height matching):
  Verified: 30 | Accu