In [1]:
import pandas as pd
import re
pd.set_option('display.max_rows', None)

with open("AERODROM WARNING COMPOSITE 0F SEPTEMBER 2023.txt", "r", encoding="utf-8") as f:
    lines = [line.strip() for line in f if line.strip()]

data = []
i = 0
while i < len(lines):
    
    if "WARNING" in lines[i] :
        i += 2
        if i >= len(lines): break
        
        
        main_line = lines[i]
        main_parts = main_line.split()
        station = main_parts[0]
        issue_time = f"{main_parts[1]}Z"
        validity_from, validity_to = "", ""
        valid_match = re.search(r"VALID\s*(\d{6,8})/(\d{6,8})", main_line)
        if valid_match:
            validity_from = valid_match.group(1)
            validity_to = valid_match.group(2)
        
        
        i += 1
        if i >= len(lines): break
        wx_line = lines[i]
        wind_dir, wind_speed, gust, sig_wx, fcst_obs = "", "", "", "", ""
        
        
        wind_speed_match = re.search(r"SFC WSPD (\d+KT)", wx_line)
        wind_speed = wind_speed_match.group(1) if wind_speed_match else ""
        
        gust_match = re.search(r"MAX(\d+)", wx_line)
        gust = f"{gust_match.group(1)}KT" if gust_match else ""
        
        
        wind_dir_dict = {
            "N": 0,
            "NNE": 20,
            "NE": 50,
            "ENE": 70,
            "E": 90,
            "ESE": 110,
            "SE": 140,
            "SSE": 160,
            "S": 180,
            "SSW": 200,
            "SW": 230,
            "WSW": 250,
            "W": 270,
            "WNW": 290,
            "NW": 320,
            "NNW": 340
        }
        wind_dir_match = re.search(r"FROM\s+([A-Z]+)", wx_line)
        if wind_dir_match:
            wind_dir_str = wind_dir_match.group(1).strip()
            wind_dir = wind_dir_str
            wind_dir_num = wind_dir_dict.get(wind_dir_str, "")
        else:
            wind_dir = ""
            wind_dir_num = ""
        
        sig_wx_match = re.search(r"(TSRA|TS|FBL TSRA|MOD TSRA|HVY TSRA|MOD TS|FBL TS|HVY TS)", wx_line)
        sig_wx = sig_wx_match.group(1) if sig_wx_match else ""
        if "HVY TSRA" in wx_line:
            sig_wx = "+TSRA"
        elif "FBL TSRA" in wx_line:
            sig_wx = "-TSRA"
        elif "MOD TSRA" in wx_line or "TSRA" in wx_line:
            sig_wx = "TSRA"
        elif "HVY TS" in wx_line:
            sig_wx = "+TS"
        elif "FBL TS" in wx_line:
            sig_wx = "-TS"
        elif "MOD TS" in wx_line or "TS" in wx_line:
            sig_wx = "TS"
        else:
            sig_wx = ""
        
        
        if "FCST" in wx_line:
            fcst_obs = "FCST"
        elif "OBS" in wx_line or "OBSD" in wx_line:
            fcst_obs = "OBS"
        
        data.append({
            "Station": station,
            "Issue date/time": issue_time,
            "Validity from": validity_from,
            "Validity To": validity_to,
            "Wind dir (deg)": wind_dir_num,
            "Wind Speed": wind_speed,
            "Gust": gust,
            "Significant Wx": sig_wx,
            "FCST/OBS": fcst_obs
        })
    i += 1


df = pd.DataFrame(data)

df["Wind dir (deg)"] = pd.to_numeric(df["Wind dir (deg)"], errors="coerce").astype("Int64")


In [2]:
df

Unnamed: 0,Station,Issue date/time,Validity from,Validity To,Wind dir (deg),Wind Speed,Gust,Significant Wx,FCST/OBS
0,VABB,040730Z,40800,41200,250.0,17KT,27KT,,FCST
1,VABB,060715Z,60730,61130,270.0,17KT,27KT,,FCST
2,VABB,062300Z,62300,70300,250.0,17KT,27KT,,OBS
3,VABB,100330Z,100400,100800,270.0,17KT,27KT,,FCST
4,VABB,100730Z,100800,101200,270.0,17KT,27KT,,FCST
5,VABB,110630Z,110700,111100,,17KT,27KT,,FCST
6,VABB,111030Z,111100,111500,250.0,17KT,27KT,,FCST
7,VABB,111300Z,111330,111730,250.0,17KT,27KT,TSRA,FCST
8,VABB,120800Z,120830,121230,250.0,17KT,27KT,,FCST
9,VABB,161030Z,161100,161500,250.0,17KT,27KT,,FCST


In [14]:
df.describe()

Unnamed: 0,Station,Issue date/time,Validity from,Validity To,Wind dir,Wind dir (deg),Wind Speed,Gust,Significant Wx,FCST/OBS
count,80,80,80,80,80,80,80,80,80.0,80
unique,2,55,55,55,12,11,3,3,6.0,3
top,VABB,180745,180815,181215,SW,230,17KT,27KT,,FCST
freq,56,2,2,2,20,20,65,63,34.0,63


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 80 entries, 0 to 79
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Station          80 non-null     object
 1   Issue date/time  80 non-null     object
 2   Validity from    80 non-null     object
 3   Validity To      80 non-null     object
 4   Wind dir (deg)   66 non-null     Int64 
 5   Wind Speed       80 non-null     object
 6   Gust             80 non-null     object
 7   Significant Wx   80 non-null     object
 8   FCST/OBS         80 non-null     object
dtypes: Int64(1), object(8)
memory usage: 5.8+ KB


In [None]:
df