In [1]:
import os
import pandas as pd
import keras

from sklearn.preprocessing import MinMaxScaler




In [4]:
def get_data_frame():
    df = pd.read_csv(r'C:\CODE\tropical_cyclone_prediction\resource\RSMC_Best_Track_Data.csv')

    # ---------------------------

    direction_30_mapping = {'(symmetric circle)': 1, 'Northeast (NE)': 6, 'South (S)': 3,
                            'East (E)': 5, 'Southeast (SE)': 9, 'West (W)': 4, 'North (N)': 2,
                            'Northwest (NW)': 6, 'Southwest (SW)': 8}
    df['Direction of the longest radius of 30kt winds or greater'] = df[
        'Direction of the longest radius of 30kt winds or greater'].map(direction_30_mapping)

    direction_50_mapping = {'(symmetric circle)': 1, 'Northeast (NE)': 6, 'South (S)': 3,
                            'East (E)': 5, 'Southeast (SE)': 9, 'West (W)': 4, 'North (N)': 2,
                            'Northwest (NW)': 6, 'Southwest (SW)': 8,
                            'No direction (Longest radius of 50kt winds is 0)': 9}
    df['Direction of the longest radius of 50kt winds or greater'] = df[
        'Direction of the longest radius of 50kt winds or greater'].map(direction_50_mapping)

    indicator_mapping = {'#': 1, ' ': 0}
    df['Indicator of landfall or passage'] = df['Indicator of landfall or passage'].map(indicator_mapping)

    # ----------------------

    df = df.drop(['International number ID', 'Name of the storm', 'Grade'], axis=1)
    columns_to_check = [
        'Direction of the longest radius of 50kt winds or greater',
        'The longest radius of 50kt winds or greater',
        'The shortest radius of 50kt winds or greater',
        'Direction of the longest radius of 30kt winds or greater',
        'The longest radius of 30kt winds or greater',
        'The shortest radius of 30kt winds or greater'
    ]
    df = df.dropna(subset=columns_to_check)

    # -----------------------

    one_hot_encoded = pd.get_dummies(df['Direction of the longest radius of 50kt winds or greater'],
                                     prefix='Direction of the longest radius of 50kt winds or greater', dtype=int)
    df = pd.concat([df, one_hot_encoded], axis=1)

    one_hot_encoded2 = pd.get_dummies(df['Direction of the longest radius of 30kt winds or greater'],
                                      prefix='Direction of the longest radius of 30kt winds or greater', dtype=int)
    df = pd.concat([df, one_hot_encoded2], axis=1)
    df = df.drop(['Direction of the longest radius of 50kt winds or greater',
                  'Direction of the longest radius of 30kt winds or greater'], axis=1)

    # ----------------------

    df['Time of analysis'] = pd.to_datetime(df['Time of analysis'])
    df['Time of analysis'] = df['Time of analysis'].apply(lambda x: x.timestamp())

    # -----------------------
    return df

def min_max_scaler(df):
    
    
    columns_to_normalize = df.columns[:]

    # 实例化 MinMaxScaler
    scaler = MinMaxScaler()

    # 对选择的列进行归一化
    df[columns_to_normalize] = scaler.fit_transform(df[columns_to_normalize])

    # 打印处理后的新 DataFrame 的信息
    #df.tail()

    return df

def split_data(df):

    split_index = int(0.7 * len(df))

    # 分割DataFrame
    train_set = df.iloc[:split_index, :]
    test_set = df.iloc[split_index:, :]

    train_x = train_set.drop(["Latitude of the center", "Longitude of the center"], axis=1)
    train_y = train_set.loc[:, ["Latitude of the center", "Longitude of the center"]]
    test_x = test_set.drop(["Latitude of the center", "Longitude of the center"], axis=1)
    test_y = test_set.loc[:, ["Latitude of the center", "Longitude of the center"]]
    # 给训练集的标签打上时间序列
    first_column = train_x.iloc[:, 0]
    train_y.insert(0, 'Time of analysis', first_column)
    

    return train_x, train_y, test_x, test_y

df = get_data_frame()
df_min_max = min_max_scaler(df)


In [6]:
df_min_max

Unnamed: 0,Time of analysis,Latitude of the center,Longitude of the center,Central pressure,Maximum sustained wind speed,The longest radius of 50kt winds or greater,The shortest radius of 50kt winds or greater,The longest radius of 30kt winds or greater,The shortest radius of 30kt winds or greater,Indicator of landfall or passage,...,Direction of the longest radius of 50kt winds or greater_8.0,Direction of the longest radius of 50kt winds or greater_9.0,Direction of the longest radius of 30kt winds or greater_1.0,Direction of the longest radius of 30kt winds or greater_2.0,Direction of the longest radius of 30kt winds or greater_3.0,Direction of the longest radius of 30kt winds or greater_4.0,Direction of the longest radius of 30kt winds or greater_5.0,Direction of the longest radius of 30kt winds or greater_6.0,Direction of the longest radius of 30kt winds or greater_8.0,Direction of the longest radius of 30kt winds or greater_9.0
25272,0.000000,0.084536,0.722877,0.857143,0.142857,0.000000,0.00,0.126506,0.208333,0.0,...,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25273,0.000015,0.094845,0.715802,0.900000,0.142857,0.000000,0.00,0.126506,0.208333,0.0,...,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25274,0.000030,0.105155,0.707547,0.928571,0.142857,0.000000,0.00,0.126506,0.208333,0.0,...,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25275,0.000044,0.113402,0.699292,0.942857,0.095238,0.000000,0.00,0.096386,0.166667,0.0,...,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25276,0.000059,0.119588,0.689858,0.942857,0.095238,0.000000,0.00,0.096386,0.166667,0.0,...,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68727,0.999941,0.468041,0.413915,0.750000,0.238095,0.246154,0.24,0.301205,0.250000,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
68728,0.999956,0.496907,0.428066,0.785714,0.190476,0.246154,0.24,0.337349,0.250000,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
68729,0.999970,0.513402,0.445755,0.785714,0.190476,0.215385,0.20,0.337349,0.250000,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
68730,0.999985,0.544330,0.464623,0.785714,0.190476,0.184615,0.16,0.337349,0.250000,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [8]:
df_min_max.to_csv(r'C:\CODE\tropical_cyclone_prediction\resource\normalized_data.csv')

In [35]:
x_train, y_train, text_x, test_y = split_data(df_min_max)

# ----- For test -------
test_head_y = test_y.loc[text_x.index[-1000:-995]]

test_head_x = text_x.loc[text_x.index[-1000:-995]]

test_tail_y = test_y.loc[text_x.index[-995:-990]]

test_tail_x = text_x.loc[text_x.index[-995:-990]]

ValueError: could not convert string to float: 'PATSY'