In [171]:
import numpy as np
import polars as pl
import os
from sklearn.preprocessing import MinMaxScaler
from pathlib import Path
from typing import List, Tuple, Union
from datetime import datetime, timedelta
from pydantic import BaseModel
from loguru import logger
from enum import Enum
import random


class EndStatus(Enum):
    DISSIPATED = 0
    MOVE_OUT_OF_RESPONSIBILITY = 1
    MERGED = 2
    NEARLY_STATIONARY = 3


class CycloneCategory(Enum):
    BELOW_TD_OR_UNKNOWN = 0
    TROPICAL_DEPRESSION = 1  # 热带低压 (TD, 10.8-17.1m/s)
    TROPICAL_STORM = 2  # 热带风暴 (TS, 17.2-24.4 m/s)
    SEVERE_TROPICAL_STORM = 3  # 强热带风暴 (STS, 24.5-32.6 m/s)
    TYPHOON = 4  # 台风 (TY, 32.7-41.4 m/s)
    SEVERE_TYPHOON = 5  # 强台风 (STY, 41.5-50.9 m/s)
    SUPER_TYPHOON = 6  # 超强台风 (SuperTY, ≥51.0 m/s)
    EXTRATROPICAL = 9  # 变性 (The change is complete)


class HurricaneHeader(BaseModel):
    data_type: int
    country_code: int
    data_count: int
    hurricane_code: int
    china_hurricane_code: int
    end_status: EndStatus
    time_interval_hr: int
    hurricane_name: str
    dataset_record_time: datetime


class HurricaneEntry(BaseModel):
    date: datetime
    category: CycloneCategory
    latitude: float
    longitude: float
    lowest_pressure: int
    wind_speed: int


class Hurricane(BaseModel):
    header: HurricaneHeader
    entries: List[HurricaneEntry]


script_folder = Path(os.getcwd())
dataset_folder = script_folder / "CMABSTdata"

# https://tcdata.typhoon.org.cn/zjljsjj.html
# example_file = dataset_folder / "CH2022BST.txt"
example_file = dataset_folder / "CH1950BST.txt"
logger.info(f"example_file: {example_file}")


def parse_header(line: str) -> HurricaneHeader:
    entry = line.split()
    data_type = int(entry[0])
    country_code = int(entry[1])
    data_count = int(entry[2])
    hurricane_code = int(entry[3])
    try:
        china_hurricane_code = int(entry[4])
    except ValueError:
        # might be a tuple (a,b)
        codes = entry[4].split(",")
        china_hurricane_code = int(codes[0])
    hurricane_end_enum = int(entry[5])
    end_status = EndStatus(hurricane_end_enum)
    time_interval_hr = int(entry[6])
    hurricane_name = entry[7]
    dataset_record_time = entry[8]
    time_format = "%Y%m%d"
    dataset_record_time = datetime.strptime(dataset_record_time, time_format)
    return HurricaneHeader(data_type=data_type,
                           country_code=country_code,
                           data_count=data_count,
                           hurricane_code=hurricane_code,
                           china_hurricane_code=china_hurricane_code,
                           end_status=end_status,
                           time_interval_hr=time_interval_hr,
                           hurricane_name=hurricane_name,
                           dataset_record_time=dataset_record_time)


def parse_entry(line: str) -> HurricaneEntry:
    entry = line.split()
    date_str = entry[0]
    time_format = "%Y%m%d%H"
    date = datetime.strptime(date_str, time_format)
    category = int(entry[1])
    hurricane_category = CycloneCategory(category)
    latitude = float(int(entry[2])) / 10.0
    longitude = float(int(entry[3])) / 10.0
    # in hPa
    lowest_pressure = int(entry[4])
    # 2分钟平均近中心最大风速(MSW, m/s)
    # WND=9 表示 MSW < 10m/s,
    # WND=0 为缺测
    wind_speed = int(entry[5])
    # not sure about OWD
    return HurricaneEntry(date=date,
                          category=hurricane_category,
                          latitude=latitude,
                          longitude=longitude,
                          lowest_pressure=lowest_pressure,
                          wind_speed=wind_speed)


def parse_dataset(filename: str | Path):
    hurricanes: list[Hurricane] = []
    with open(filename, "r") as f:
        try:
            while True:
                # check if the line is empty
                l = f.readline()
                if not l:
                    break
                header = parse_header(l)
                count = header.data_count
                hurricane_entries = []
                for i in range(count):
                    entry = parse_entry(f.readline())
                    hurricane_entries.append(entry)
                hurricane = Hurricane(header=header, entries=hurricane_entries)
                hurricanes.append(hurricane)
        except ValueError as e:
            logger.error(f"ValueError: {e} for {filename}")
        except IndexError as e:
            logger.warning(f"IndexError: {e} for {filename}")
        except EOFError:
            logger.info(f"EOFError for {filename}")
    return hurricanes



2024-03-19 21:37:15.540 | INFO     | __main__:<module>:64 - example_file: c:\Users\cross\Desktop\code\hurrican\CMABSTdata\CH1950BST.txt


In [173]:
total_dataset: list[Hurricane] = []

for file in dataset_folder.glob("*.txt"):
    hurricanes = parse_dataset(file)
    total_dataset.extend(hurricanes)

logger.info(f"total_dataset: {len(total_dataset)}")

2024-03-19 21:37:26.885 | INFO     | __main__:<module>:7 - total_dataset: 2469


In [174]:
class FlatHurricaneEntry(BaseModel):
    name: str
    china_hurricane_code: int
    date: datetime
    category: CycloneCategory
    latitude: float
    longitude: float
    lowest_pressure: int
    wind_speed: int


def flat_hurricane_entries(
        hurricanes: list[Hurricane]) -> List[FlatHurricaneEntry]:

    def flat_one(h: Hurricane):
        name = h.header.hurricane_name
        hurricane_code = h.header.hurricane_code
        entries = h.entries
        return [
            FlatHurricaneEntry(name=name,
                               china_hurricane_code=hurricane_code,
                               date=e.date,
                               category=e.category,
                               latitude=e.latitude,
                               longitude=e.longitude,
                               lowest_pressure=e.lowest_pressure,
                               wind_speed=e.wind_speed) for e in entries
        ]

    entries = []
    for h in hurricanes:
        entries.extend(flat_one(h))
    return entries


flatten_entries = [
    e.model_dump() for e in flat_hurricane_entries(total_dataset)
]


def entry_enum_to_number(entry: dict[str, any]) -> dict[str, any]:
    entry['category'] = entry['category'].value
    return entry


flatten_entries_without_enum = [
    entry_enum_to_number(e) for e in flatten_entries
]

In [181]:
random.sample(flatten_entries_without_enum, 5)

[{'name': 'Fengshen',
  'china_hurricane_code': 7,
  'date': datetime.datetime(2008, 6, 19, 6, 0),
  'category': 2,
  'latitude': 10.2,
  'longitude': 130.0,
  'lowest_pressure': 990,
  'wind_speed': 23},
 {'name': 'Hester',
  'china_hurricane_code': 21,
  'date': datetime.datetime(1957, 10, 11, 0, 0),
  'category': 9,
  'latitude': 51.5,
  'longitude': 160.5,
  'lowest_pressure': 974,
  'wind_speed': 0},
 {'name': 'Tess',
  'china_hurricane_code': 36,
  'date': datetime.datetime(1978, 10, 31, 12, 0),
  'category': 0,
  'latitude': 13.5,
  'longitude': 146.5,
  'lowest_pressure': 1002,
  'wind_speed': 10},
 {'name': 'Trix',
  'china_hurricane_code': 4,
  'date': datetime.datetime(1957, 5, 11, 6, 0),
  'category': 4,
  'latitude': 21.8,
  'longitude': 150.4,
  'lowest_pressure': 973,
  'wind_speed': 40},
 {'name': 'Judy',
  'china_hurricane_code': 24,
  'date': datetime.datetime(1957, 10, 19, 18, 0),
  'category': 0,
  'latitude': 15.1,
  'longitude': 152.5,
  'lowest_pressure': 1000,
 

In [182]:
df = pl.DataFrame(flatten_entries_without_enum)

In [183]:
df.describe()

describe,name,china_hurricane_code,date,category,latitude,longitude,lowest_pressure,wind_speed
str,str,f64,str,f64,f64,f64,f64,f64
"""count""","""71705""",71705.0,"""71705""",71705.0,71705.0,71705.0,71705.0,71705.0
"""null_count""","""0""",0.0,"""0""",0.0,0.0,0.0,0.0,0.0
"""mean""",,17.650066,,2.821588,20.902251,134.228423,986.340004,23.758218
"""std""",,10.47291,,2.330128,9.283707,16.616481,20.931241,15.36339
"""min""","""(nameless)""",1.0,"""1949-01-13 00:...",0.0,0.5,95.0,870.0,0.0
"""max""","""Zola""",53.0,"""2022-12-13 06:...",9.0,70.1,243.9,1022.0,110.0
"""median""",,17.0,,2.0,19.3,132.5,995.0,20.0


In [188]:
#df.write_csv("hurricane.csv")

In [37]:
import os
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.regularizers import l1, l2

# 文件夹路径
folder_path = "D:\\毕业论文\\数据集\\insert"

# 获取文件夹中所有文件
files = os.listdir(folder_path)

X_all, y_all = [], []

for file_name in files:
    file_path = os.path.join(folder_path, file_name)
    data = np.loadtxt(file_path, delimiter=' ')
    X, y = create_dataset(data, time_steps)
    X_all.extend(X)
    y_all.extend(y)

X_all = np.array(X_all)
y_all = np.array(y_all)

# 定义和编译 LSTM 模型
model = Sequential([
    LSTM(64, input_shape=(X_all.shape[1], X_all.shape[2]), kernel_regularizer=l2(0.01)),
    Dense(y_all.shape[1], kernel_regularizer=l2(0.01))
])

# 编译模型
model.compile(optimizer='adam', loss='mse')

# 训练模型
model.fit(X_all, y_all, epochs=2, batch_size=32)

# 保存模型
model.save('lstm_model.h5')


Epoch 1/2
 335/2293 [===>..........................] - ETA: 11s - loss: nan

KeyboardInterrupt: 

In [23]:
# 打印模型摘要
print(model.summary())


Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_10 (LSTM)              (None, 64)                17920     
                                                                 
 dense_10 (Dense)            (None, 5)                 325       
                                                                 
Total params: 18,245
Trainable params: 18,245
Non-trainable params: 0
_________________________________________________________________
None


In [1]:
import torch.nn as nn
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    def forward(self, x):
        out, _ = self.lstm(x) # LSTM层
        out = self.fc(out[:, -1, :]) # 全连接层
        return out

ModuleNotFoundError: No module named 'torch._prims_common'