In [None]:
!pip install pandas beautifulsoup4

In [31]:
import pandas as pd
from bs4 import BeautifulSoup
import zipfile
import os

def _extract_gridcode_from_table(html_soup):
    """Extract gridcode from HTML table format."""
    for td in html_soup.find_all('td'):
        text = td.get_text(strip=True)
        if text.lower() == 'gridcode':
            next_td = td.find_next_sibling('td')
            if next_td:
                return next_td.get_text(strip=True)
    return None

def _extract_gridcode_from_text(html_soup):
    """Extract gridcode from text format (fallback)."""
    text_content = html_soup.get_text(separator='|')
    parts = text_content.split('|')
    for i, part in enumerate(parts):
        if 'gridcode' in part.lower():
            if ':' in part:
                return part.split(':')[-1].strip()
            elif i + 1 < len(parts):
                return parts[i + 1].strip()
    return None

def _extract_gridcode(description):
    """Extract gridcode from description element."""
    if not description:
        return None
    
    html_soup = BeautifulSoup(description.text, 'html.parser')
    gridcode = _extract_gridcode_from_table(html_soup)
    return gridcode if gridcode else _extract_gridcode_from_text(html_soup)

def _extract_coordinates(placemark):
    """Extract x, y coordinates from placemark."""
    coords_tag = placemark.find('coordinates')
    if not coords_tag:
        return None, None
    
    try:
        coords_raw = coords_tag.text.strip().split()[0]
        parts = coords_raw.split(',')
        return parts[0], parts[1]
    except IndexError:
        return None, None

def _extract_placemark_data(placemark):
    """Extract all data from a single placemark."""
    row = {
        'Id': placemark.find('name').text if placemark.find('name') else 'N/A',
        'gridcode': _extract_gridcode(placemark.find('description')),
    }
    
    row['x'], row['y'] = _extract_coordinates(placemark)
    return row


# Main Function 
def kmz_to_csv(kmz_file_path, csv_output_path):
    with zipfile.ZipFile(kmz_file_path, 'r') as kmz:
        kml_filename = [f for f in kmz.namelist() if f.endswith('.kml')][0]
        with kmz.open(kml_filename, 'r') as kml_file:
            kml_content = kml_file.read()

    soup = BeautifulSoup(kml_content, 'xml')
    data = [_extract_placemark_data(pm) for pm in soup.find_all('Placemark')]
    
    df = pd.DataFrame(data)[['Id', 'x', 'y', 'gridcode']]
    df.to_csv(csv_output_path, index=False)
    print(f"Success! Converted '{kmz_file_path}' to '{csv_output_path}'")
    return df

In [32]:
# Replace with your KMZ file path
input_kmz = './data/raw/Flood level reports/รายงานการคาดการณ์ความลึกน้ำท่วมจากแบบจำลอง วันที่ 24 พฤศจิกายน 2568 เวลา 22.46 น Shenqi-02/bc5_20251124_2200.kmz'
output_csv = './data/raw/flood/' + os.path.basename(input_kmz).replace('.kmz', '.csv')

if os.path.exists(input_kmz):
    df_result = kmz_to_csv(input_kmz, output_csv)
    display(df_result.head())
else:
    print(f"File {input_kmz} not found.")

Success! Converted './data/raw/Flood level reports/รายงานการคาดการณ์ความลึกน้ำท่วมจากแบบจำลอง วันที่ 24 พฤศจิกายน 2568 เวลา 22.46 น Shenqi-02/bc5_20251124_2200.kmz' to './data/raw/flood/bc5_20251124_2200.csv'


Unnamed: 0,Id,x,y,gridcode
0,1,100.3712499996667,7.299583332999997,2
1,2,100.3754166663333,7.299583332999997,3
2,3,100.3654166663333,7.298749999666663,4
3,4,100.3729166663333,7.298749999666663,2
4,5,100.3737499996667,7.298749999666663,4
