# PSMSL Sea Level Data Extraction
Tải dữ liệu mực nước biển từ PSMSL (1979-2013) cho 5 trạm: HonDau, HonNgu, QuyNhon, DaNang, VungTau

In [1]:
import requests
import pandas as pd
import numpy as np

stations = {'HonDau': 841, 'HonNgu': 1003, 'QuyNhon': 1449, 'DaNang': 1475, 'VungTau': 1495}

In [2]:
# Tải dữ liệu từ PSMSL
station_data = {}
for name, sid in stations.items():
    url = f"https://psmsl.org/data/obtaining/rlr.monthly.data/{sid}.rlrdata"
    station_data[name] = requests.get(url, timeout=30).text
    print(f"✓ {name}")

✓ HonDau
✓ HonNgu
✓ QuyNhon
✓ DaNang
✓ VungTau


In [3]:
# Phân tích và gộp dữ liệu
def parse_data(raw, station):
    data = []
    for line in raw.split('\n'):
        if not line.strip() or line.startswith(';'):
            continue
        try:
            parts = line.split(';')
            ym = float(parts[0])
            year, month = int(ym), int((ym - int(ym)) * 12 + 1)
            month = max(1, min(12, month))
            val = int(parts[1])
            data.append((year, month, np.nan if val == -99999 else val))
        except:
            continue
    
    # Chuyển sang dạng bảng Year x 12 tháng
    df = pd.DataFrame(data, columns=['Year', 'Month', 'Value'])
    months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    df['Month'] = df['Month'].apply(lambda x: months[x-1])
    
    pivot = df.pivot(index='Year', columns='Month', values='Value').reset_index()
    pivot = pivot[['Year'] + months]
    pivot['Station'] = station
    pivot['Annual_Average'] = pivot[months].mean(axis=1)
    pivot['Source'] = 'PSMSL'
    
    return pivot[['Year', 'Station'] + months + ['Annual_Average', 'Source']]

all_df = [parse_data(data, name) for name, data in station_data.items()]
result = pd.concat(all_df, ignore_index=True)
result = result[(result['Year'] >= 1979) & (result['Year'] <= 2013)].sort_values(['Station', 'Year']).reset_index(drop=True)

print(f"✓ {len(result)} bản ghi (1979-2013)")
print(f"Trạm: {sorted(result['Station'].unique())}")

✓ 175 bản ghi (1979-2013)
Trạm: ['DaNang', 'HonDau', 'HonNgu', 'QuyNhon', 'VungTau']


In [4]:
# Hiển thị mẫu dữ liệu
print("Mẫu dữ liệu:")
display(result.head(10))
print(f"\nThống kê theo trạm:")
print(result.groupby('Station').size())

Mẫu dữ liệu:


Month,Year,Station,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,Annual_Average,Source
0,1979,DaNang,6885.0,6875.0,6905.0,6875.0,6885.0,6895.0,6725.0,6855.0,6945.0,7265.0,7315.0,7055.0,6956.666667,PSMSL
1,1980,DaNang,6935.0,6945.0,6795.0,6825.0,6855.0,6775.0,6785.0,6835.0,7165.0,7265.0,7275.0,7145.0,6966.666667,PSMSL
2,1981,DaNang,7105.0,6965.0,6875.0,6855.0,6895.0,6935.0,6885.0,6895.0,6935.0,7215.0,7305.0,7125.0,6999.166667,PSMSL
3,1982,DaNang,6855.0,6815.0,6745.0,6665.0,6775.0,6845.0,6815.0,6755.0,7015.0,7075.0,7085.0,7085.0,6877.5,PSMSL
4,1983,DaNang,6985.0,6825.0,6845.0,6765.0,6755.0,6775.0,6775.0,6845.0,6865.0,7235.0,7245.0,7095.0,6917.5,PSMSL
5,1984,DaNang,6985.0,6925.0,6845.0,6815.0,6855.0,6855.0,6755.0,6895.0,6845.0,7185.0,7195.0,7105.0,6938.333333,PSMSL
6,1985,DaNang,6925.0,6925.0,6755.0,6845.0,6785.0,6895.0,6775.0,6815.0,6975.0,7125.0,7165.0,7145.0,6927.5,PSMSL
7,1986,DaNang,6985.0,6925.0,6915.0,6805.0,6835.0,6795.0,6805.0,6965.0,7005.0,7195.0,7235.0,7095.0,6963.333333,PSMSL
8,1987,DaNang,6985.0,6885.0,6835.0,6835.0,6785.0,6835.0,6795.0,6845.0,6985.0,7025.0,7195.0,7125.0,6927.5,PSMSL
9,1988,DaNang,6965.0,6955.0,6935.0,6895.0,6815.0,6835.0,6805.0,6795.0,6925.0,7355.0,7225.0,7025.0,6960.833333,PSMSL



Thống kê theo trạm:
Station
DaNang     35
HonDau     35
HonNgu     35
QuyNhon    35
VungTau    35
dtype: int64


In [7]:
# Lưu file
path = '/home/phamminhtien/Desktop/Sealeving/data/sea_level_data_1979-2013_psmsl'
result.to_csv(f'{path}.csv', index=False)
result.to_excel(f'{path}.xlsx', index=False)
print(f"✓ Đã lưu:\n  • {path}.csv\n  • {path}.xlsx")

✓ Đã lưu:
  • /home/phamminhtien/Desktop/Sealeving/data/sea_level_data_1979-2013_psmsl.csv
  • /home/phamminhtien/Desktop/Sealeving/data/sea_level_data_1979-2013_psmsl.xlsx
