## Data Preprocessing

In [3]:
import numpy as np
import pandas as pd
import sys

In [None]:
import re

def dms_to_decimal(dms_str):
    """Convert DMS string like '25°04′34.42″N' to decimal degrees."""
    pattern = r"(\d+)°(\d+)'([\d.]+)\"?([NSEW])"
    match = re.match(pattern, dms_str.strip().replace("′", "'").replace("″", '"'))
    
    if not match:
        raise ValueError(f"Invalid DMS format: {dms_str}")
    
    degrees, minutes, seconds, direction = match.groups()
    decimal = int(degrees) + int(minutes) / 60 + float(seconds) / 3600
    
    if direction in ['S', 'W']:
        decimal *= -1
    
    return decimal

def split_lat_lon(latlon_str):
    """Split '25°04′34.42″N 121°25′08.59″E' into separate lat and lon decimal values."""
    parts = latlon_str.split()
    lat = dms_to_decimal(parts[0])
    lon = dms_to_decimal(parts[1])
    return lat, lon


In [23]:
station_list = pd.read_csv("../Raw Data/Meteorological Station List.csv")

station_list.head()

Unnamed: 0,Meteorological Station,LatitudeLongitude,File Name
0,五股 Wugu C0AI00,"25°04'34.42""N 121°25'08.59""E",逐日平均氣溫年報表_2024 五股 _ csv_1
1,斗南 Dounan C0K460,"23°40'49.72""N 120°28'12.83""E",逐日平均氣溫年報表_2024 斗南 _ csv_15
2,宜蘭 YILAN 467080,"24°45'56.85""N 121°44'54.03""E",逐日平均氣溫年報表_2024 宜蘭 _ csv_12
3,板橋 Banchiao C0AJ80,"25°00'44.75""N 121°26'26.30""E",逐日平均氣溫年報表_2024 板橋 _ csv
4,南投 Nantou C0I460,"23°54'47.55""N 120°40'45.06""E",逐日平均氣溫年報表_2024 南投 _ csv_7


In [24]:
# Apply conversion to the full DataFrame
stations_data = []

for _, row in station_list.iterrows():
    station = row['Meteorological Station']
    latlon = row['LatitudeLongitude']
    try:
        lat, lon = split_lat_lon(latlon)
        stations_data.append({'Station': station, 'Latitude': lat, 'Longitude': lon})
    except Exception as e:
        print(f"Failed to parse row: {row['Meteorological Station']}, error: {e}")

# Convert to DataFrame
stations_df = pd.DataFrame(stations_data)

stations_df.head()

Failed to parse row: 五股  Wugu C0AI00, error: Invalid DMS format: 
Failed to parse row: 斗南  Dounan C0K460, error: Invalid DMS format: 
Failed to parse row: 宜蘭  YILAN 467080, error: Invalid DMS format: 
Failed to parse row: 板橋  Banchiao C0AJ80, error: Invalid DMS format: 
Failed to parse row: 南投  Nantou C0I460, error: Invalid DMS format: 
Failed to parse row: 恆春  HENGCHUN 467590, error: Invalid DMS format: 
Failed to parse row: 苗栗  Miaoli C0E750, error: Invalid DMS format: 
Failed to parse row: 高雄  Kaohsiung 467441, error: Invalid DMS format: 
Failed to parse row: 基隆  KEELUNG 466940, error: Invalid DMS format: 
Failed to parse row: 深坑  Shenkeng C0AH80, error: Invalid DMS format: 
Failed to parse row: 新竹市東區  Dongqu Hsinshu City C0D660, error: Invalid DMS format: 
Failed to parse row: 嘉義  CHIAYI 467480, error: Invalid DMS format: 
Failed to parse row: 臺中  TAICHUNG 467490, error: Invalid DMS format: 
Failed to parse row: 臺北  TAIPEI 466920, error: Invalid DMS format: 
Failed to parse row: 臺南

Unnamed: 0,Station,Latitude,Longitude
0,員林 Yuanlin C0G650,23.948056,120.577778
1,桃園 Taoyuan C0C480,24.994167,121.315
