In [27]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam, AdamW
import matplotlib.pyplot as plt

def load_and_preprocess_data(file_path):
    df = pd.read_csv(file_path)
    df = df.dropna().reset_index(drop=True)
    #df['datetime'] = pd.to_datetime(df['year'] + ' ' + df['month'])
    df = df.sort_index()
    return df

def create_sequences_for_storm(storm_data, seq_length):
    features = ['lat', 'long', 'wind', 'pressure']
    sequences = []
    targets = []
    for i in range(len(storm_data) - seq_length):
        seq = storm_data.iloc[i:i+seq_length][features].values
        target = storm_data.iloc[i+seq_length][['lat', 'long']].values
        sequences.append(seq)
        targets.append(target)
    return np.array(sequences, dtype=np.float32), np.array(targets, dtype=np.float32)

def build_model(seq_length, n_features):
    model = Sequential([
        LSTM(64, activation='relu', input_shape=(seq_length, n_features), return_sequences=True),
        LSTM(32, activation='relu'),
        Dense(2)
    ])
    model.compile(optimizer=AdamW(learning_rate=0.001), loss='mse', metrics=["accuracy"])
    return model

def train_model(model, X_train, y_train, epochs=50, batch_size=32):
    history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, 
                        validation_split=0.2, verbose=1)
    return history

def evaluate_model(model, X_test, y_test):
    mse = model.evaluate(X_test, y_test, verbose=0)
    print(f"Mean Squared Error on test set: {mse}")

def plot_predictions(y_true, y_pred, storm_name):
    plt.figure(figsize=(12, 6))
    plt.plot(y_true[:, 0], y_true[:, 1], label='Actual Path', marker='o')
    plt.plot(y_pred[:, 0], y_pred[:, 1], label='Predicted Path', marker='x')
    plt.legend()
    plt.title(f'Actual vs Predicted Path for {storm_name}')
    plt.xlabel('Latitude')
    plt.ylabel('Longitude')
    plt.show()

def predict_next_position(model, last_sequence, scaler):
    next_pred = model.predict(np.array([last_sequence]))
    return scaler.inverse_transform(next_pred)[0]

def determine_sequence_length(storm_data):
    n_points = len(storm_data)
    if n_points <= 10:
        return max(2, n_points // 2)
    elif n_points <= 20:
        return n_points // 3
    else:
        return min(n_points // 4, 10)  # Cap at 10 for very long storms

def process_storm(storm_data):
    features = ['lat', 'long', 'wind', 'pressure']
    
    seq_length = determine_sequence_length(storm_data)
    print(f"Using sequence length of {seq_length}")
    
    scaler = MinMaxScaler()
    storm_data[features] = scaler.fit_transform(storm_data[features])
    
    X, y = create_sequences_for_storm(storm_data, seq_length)
    
    if len(X) < 2:
        print(f"Not enough data points for storm {storm_data['name'].iloc[0]} after sequencing")
        return None, None
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    model = build_model(seq_length, len(features))
    history = train_model(model, X_train, y_train)
    
    evaluate_model(model, X_test, y_test)
    
    y_pred = model.predict(X_test)
    
    # Inverse transform predictions and actual values
    y_pred_inv = scaler.inverse_transform(y_pred)
    y_test_inv = scaler.inverse_transform(y_test)
    
    plot_predictions(y_test_inv, y_pred_inv, storm_data['name'].iloc[0])
    
    # Predict next position
    if len(X_test) > 0:
        last_sequence = X_test[-1]
        next_position = predict_next_position(model, last_sequence, scaler)
        print(f"Predicted next position: Lat {next_position[0]:.2f}, Lon {next_position[1]:.2f}")
    else:
        print("Not enough test data to make a prediction")
    
    return model, scaler

# Main execution
file_path = 'HurriData.csv'
df = load_and_preprocess_data(file_path)

#Process each storm individually
for storm_name, storm_data in df.groupby('name'):
    print(f"\nProcessing storm: {storm_name}")
    if len(storm_data) > 3:  # Ensure we have at least 4 data points
        model, scaler = process_storm(storm_data)
    else:
        print(f"Not enough data points for storm {storm_name}")
    print("=" * 50)


Processing storm: Alex
Using sequence length of 6
Epoch 1/50


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.4167 - loss: 0.3090 - val_accuracy: 0.2500 - val_loss: 0.1891
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - accuracy: 0.4167 - loss: 0.3011 - val_accuracy: 0.2500 - val_loss: 0.1845
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - accuracy: 0.4167 - loss: 0.2937 - val_accuracy: 0.2500 - val_loss: 0.1800
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - accuracy: 0.4167 - loss: 0.2866 - val_accuracy: 0.2500 - val_loss: 0.1755
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - accuracy: 0.4167 - loss: 0.2796 - val_accuracy: 0.2500 - val_loss: 0.1712
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - accuracy: 0.3333 - loss: 0.2725 - val_accuracy: 0.2500 - val_loss: 0.1669
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

ValueError: operands could not be broadcast together with shapes (4,2) (4,) (4,2) 

In [25]:
model.save("Testmodel.h5")

NameError: name 'model' is not defined