In [None]:
import pandas as pd
import numpy as np

In [None]:
df=pd.read_csv('ibtracs.ALL.list.v04r01 (1).csv')

  df=pd.read_csv('ibtracs.ALL.list.v04r01 (1).csv')


In [None]:
mf = df[['ISO_TIME', 'LAT', 'LON', 'STORM_SPEED', 'STORM_DIR']].copy()

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

def preprocess_cyclone_data(df, task='path'):
    """
    Preprocesses cyclone data for LSTM and ML models.

    Parameters:
        df (DataFrame): Raw cyclone dataset.
        task (str): 'path' for trajectory prediction (LSTM), 'speed_dir' for speed/direction prediction (ML).

    Returns:
        Processed DataFrame (X, y) and scalers (if LSTM).
    """
    df = df.copy()  # Avoid modifying original data

    # Convert time column to datetime
    df['ISO_TIME'] = pd.to_datetime(df['ISO_TIME'], format='%Y-%m-%d %H:%M:%S', errors='coerce')

    # Set index for resampling
    df.set_index('ISO_TIME', inplace=True)

    # Handle duplicate timestamps (keep first occurrence)
    df = df[~df.index.duplicated(keep='first')]

    # Convert numeric columns before resampling
    for col in ['STORM_SPEED', 'STORM_DIR', 'LAT', 'LON']:
        df[col] = pd.to_numeric(df[col], errors='coerce')

    # Convert objects to numeric before interpolating
    df = df.infer_objects(copy=False)

    # Resample data every 6 hours and interpolate missing values
    df = df.resample('6h').interpolate()

    # Reset index after resampling
    df.reset_index(inplace=True)

    # Circular encoding for wind direction
    df['dir_sin'] = np.sin(np.deg2rad(df['STORM_DIR']))
    df['dir_cos'] = np.cos(np.deg2rad(df['STORM_DIR']))

    # Interaction terms
    df['lat_lon_interaction'] = df['LAT'] * df['LON']
    df['speed_lat_interaction'] = df['STORM_SPEED'] * df['LAT']
    df['speed_lon_interaction'] = df['STORM_SPEED'] * df['LON']

    # Create lag features
    df['STORM_SPEED_LAG1'] = df['STORM_SPEED'].shift(1)
    df['LAT_LAG'] = df['LAT'].shift(1)
    df['LON_LAG'] = df['LON'].shift(1)

    # Moving Averages
    df['LAT_MA3'] = df['LAT'].rolling(window=3).mean()
    df['LON_MA3'] = df['LON'].rolling(window=3).mean()
    df['SPEED_MA3'] = df['STORM_SPEED'].rolling(window=3).mean()

    # Differences (Change over time)
    df['DIST_CHANGE'] = np.sqrt((df['LAT'] - df['LAT_LAG'])**2 + (df['LON'] - df['LON_LAG'])**2)
    df['SPEED_CHANGE'] = df['STORM_SPEED'].diff()

    # Standard deviation over 3 periods
    df['SPEED_STD3'] = df['STORM_SPEED'].rolling(window=3).std()

    # Time-based features (Extract **after** resetting index)
    df['YEAR'] = df['ISO_TIME'].dt.year
    df['MONTH'] = df['ISO_TIME'].dt.month
    df['DAY'] = df['ISO_TIME'].dt.day
    df['HOUR'] = df['ISO_TIME'].dt.hour

    # Fill missing values with forward fill
    df.ffill(inplace=True)

    # Define feature sets based on task
    if task == 'path':
        features = ['LAT', 'LON', 'STORM_SPEED', 'HOUR', 'MONTH',
                    'lat_lon_interaction', 'speed_lat_interaction', 'speed_lon_interaction',
                    'dir_sin', 'dir_cos']
        target_cols = ['LAT', 'LON']  # Predicting future position
    elif task == 'speed_dir':
        features = ['LAT', 'LON', 'HOUR', 'MONTH', 'dir_sin', 'dir_cos',
                    'STORM_SPEED_LAG1', 'LAT_LAG', 'LON_LAG',
                    'LAT_MA3', 'LON_MA3', 'SPEED_MA3', 'SPEED_STD3']
        target_cols = ['STORM_SPEED', 'STORM_DIR']  # Predicting speed and direction
    else:
        raise ValueError("Invalid task type! Choose 'path' or 'speed_dir'.")

    # Drop NaN values after feature creation
    df.dropna(inplace=True)

    # Extract features and target
    X = df[features].values
    y = df[target_cols].values

    # Scaling (for LSTM only)
    if task == 'path':
        scaler_X = StandardScaler()
        scaler_y = StandardScaler()
        X = scaler_X.fit_transform(X)
        y = scaler_y.fit_transform(y)

        # Reshape for LSTM [samples, timesteps, features]
        X = X.reshape((X.shape[0], 1, X.shape[1]))

        return X, y, scaler_X, scaler_y  # Return scalers for inverse transformation

    return X, y  # ML models don’t need reshaping or scaling

In [None]:
from xgboost import XGBRegressor

# Create models
speed_model = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
dir_model = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)

In [None]:
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
import numpy as np

# Preprocess the data
X, y = preprocess_cyclone_data(df, task='speed_dir')  # X = features, y = [STORM_SPEED, STORM_DIR]

# Split into training & test sets (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Extract target variables
y_train_speed, y_train_dir = y_train[:, 0], y_train[:, 1]  # Speed & Direction separately
y_test_speed, y_test_dir = y_test[:, 0], y_test[:, 1]

# Create XGBoost models
speed_model = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
dir_model = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)

# Train models
speed_model.fit(X_train, y_train_speed)
dir_model.fit(X_train, y_train_dir)

# Predict on test data
y_pred_speed = speed_model.predict(X_test)
y_pred_dir = dir_model.predict(X_test)

# Evaluate performance
from sklearn.metrics import mean_absolute_error, r2_score

print("⚡ **Speed Model Performance**")
print("MAE:", mean_absolute_error(y_test_speed, y_pred_speed))
print("R² Score:", r2_score(y_test_speed, y_pred_speed))

print("\n🧭 **Direction Model Performance**")
print("MAE:", mean_absolute_error(y_test_dir, y_pred_dir))
print("R² Score:", r2_score(y_test_dir, y_pred_dir))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = pd.to_numeric(df[col], errors='coerce')
  df = df.resample('6h').interpolate()


⚡ **Speed Model Performance**
MAE: 0.9540082789638384
R² Score: 0.93415075550222

🧭 **Direction Model Performance**
MAE: 0.10304005607331024
R² Score: 0.9999960459225579


In [None]:
import joblib

# Save the trained models
joblib.dump(speed_model, "speed_model.pkl")  # Save speed prediction model
joblib.dump(dir_model, "dir_model.pkl")  # Save direction prediction model

print("✅ Models saved successfully!")


✅ Models saved successfully!


In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping

def preprocess_cyclone_data_lstm(df):
    """Preprocess cyclone data for LSTM model."""

    # Ensure datetime format
    df = df.copy()  # Avoid modifying original DataFrame
    df['ISO_TIME'] = pd.to_datetime(df['ISO_TIME'], errors='coerce')

    # Extract time-based features
    df['hour'] = df['ISO_TIME'].dt.hour
    df['month'] = df['ISO_TIME'].dt.month

    # Convert numeric columns to float
    numeric_cols = ['LAT', 'LON', 'STORM_SPEED', 'STORM_DIR']
    df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')

    # Drop rows with missing values
    df = df.dropna(subset=numeric_cols).copy()

    # Convert storm direction to sine & cosine
    df.loc[:, 'dir_sin'] = np.sin(np.deg2rad(df['STORM_DIR']))
    df.loc[:, 'dir_cos'] = np.cos(np.deg2rad(df['STORM_DIR']))

    # Interaction terms
    df.loc[:, 'lat_lon_interaction'] = df['LAT'] * df['LON']
    df.loc[:, 'speed_lat_interaction'] = df['STORM_SPEED'] * df['LAT']
    df.loc[:, 'speed_lon_interaction'] = df['STORM_SPEED'] * df['LON']

    # Define features and target
    features = [
        'LAT', 'LON', 'STORM_SPEED', 'hour', 'month',
        'lat_lon_interaction', 'speed_lat_interaction', 'speed_lon_interaction',
        'dir_sin', 'dir_cos'
    ]
    target_cols = ['LAT', 'LON']

    # Extract values
    X = df[features].values
    y = df[target_cols].values

    # Feature scaling
    scaler_X = StandardScaler()
    scaler_y = StandardScaler()

    X_scaled = scaler_X.fit_transform(X)
    y_scaled = scaler_y.fit_transform(y)

    # Reshape for LSTM input (samples, timesteps=1, features)
    X_scaled = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

    return X_scaled, y_scaled, scaler_X, scaler_y

# Preprocess data for LSTM
X_scaled, y_scaled, scaler_X, scaler_y = preprocess_cyclone_data_lstm(df)

# Split data for training and testing
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

# Build LSTM Model
model = Sequential([
    LSTM(64, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dense(32, activation='relu'),
    Dense(2)  # Predicting (LAT, LON)
])

# Compile Model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Define Early Stopping (optional)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train Model
history = model.fit(
    X_train, y_train,
    epochs=50, batch_size=32,
    validation_data=(X_test, y_test),
    verbose=1, callbacks=[early_stopping]
)

# Predict
y_pred_scaled = model.predict(X_test)
y_pred = scaler_y.inverse_transform(y_pred_scaled)

# Display Sample Predictions
y_test_actual = scaler_y.inverse_transform(y_test)
for i in range(5):
    print(f"Actual: {y_test_actual[i]}, Predicted: {y_pred[i]}")

  df['ISO_TIME'] = pd.to_datetime(df['ISO_TIME'], errors='coerce')
  super().__init__(**kwargs)


Epoch 1/50
[1m17936/17936[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 4ms/step - loss: 0.0163 - mae: 0.0309 - val_loss: 7.3949e-05 - val_mae: 0.0060
Epoch 2/50
[1m17936/17936[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 4ms/step - loss: 3.6736e-05 - mae: 0.0041 - val_loss: 1.6908e-05 - val_mae: 0.0028
Epoch 3/50
[1m17936/17936[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 4ms/step - loss: 2.6474e-05 - mae: 0.0035 - val_loss: 1.9108e-05 - val_mae: 0.0033
Epoch 4/50
[1m17936/17936[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 4ms/step - loss: 2.1969e-05 - mae: 0.0032 - val_loss: 7.4142e-06 - val_mae: 0.0019
Epoch 5/50
[1m17936/17936[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 4ms/step - loss: 1.9582e-05 - mae: 0.0030 - val_loss: 7.0120e-06 - val_mae: 0.0020
Epoch 6/50
[1m17936/17936[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 4ms/step - loss: 1.5041e-05 - mae: 0.0026 - val_loss: 7.4416e-06 - val_mae: 0.0020
Epoch 7/50
[1

KeyboardInterrupt: 

In [None]:
joblib.dump(scaler_X, "scaler_X.pkl")
joblib.dump(scaler_y, "scaler_y.pkl")


In [None]:
import numpy as np
import pandas as pd
import joblib  # For saving/loading scalers
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping

def preprocess_cyclone_data_lstm(df, scaler_X=None, scaler_y=None, training=True):
    """Preprocess cyclone data for LSTM training and inference."""

    df = df.copy()  # Avoid modifying the original DataFrame
    df['ISO_TIME'] = pd.to_datetime(df['ISO_TIME'], errors='coerce')

    # Extract time-based features
    df['hour'] = df['ISO_TIME'].dt.hour.fillna(0).astype(int)
    df['month'] = df['ISO_TIME'].dt.month.fillna(0).astype(int)

    # Convert numeric columns to float
    numeric_cols = ['LAT', 'LON', 'STORM_SPEED', 'STORM_DIR']
    for col in numeric_cols:
        df[col] = pd.to_numeric(df[col], errors='coerce')

    # Fill missing values with median instead of dropping data
    df.fillna(df.median(numeric_only=True), inplace=True)

    # Convert storm direction to sine & cosine
    df['dir_sin'] = np.sin(np.deg2rad(df['STORM_DIR']))
    df['dir_cos'] = np.cos(np.deg2rad(df['STORM_DIR']))

    # Interaction terms
    df['lat_lon_interaction'] = df['LAT'] * df['LON']
    df['speed_lat_interaction'] = df['STORM_SPEED'] * df['LAT']
    df['speed_lon_interaction'] = df['STORM_SPEED'] * df['LON']

    # Define feature columns
    features = [
        'LAT', 'LON', 'STORM_SPEED', 'hour', 'month',
        'lat_lon_interaction', 'speed_lat_interaction', 'speed_lon_interaction',
        'dir_sin', 'dir_cos'
    ]
    target_cols = ['LAT', 'LON']

    # Extract feature values
    X = df[features].values
    y = df[target_cols].values if training else None

    # Load or fit scalers
    if training:
        scaler_X = StandardScaler()
        scaler_y = StandardScaler()
        X_scaled = scaler_X.fit_transform(X)
        y_scaled = scaler_y.fit_transform(y)

        # Save scalers for later inference
        joblib.dump(scaler_X, "scaler_X.pkl")
        joblib.dump(scaler_y, "scaler_y.pkl")

    else:
        scaler_X = joblib.load("scaler_X.pkl")
        X_scaled = scaler_X.transform(X)
        y_scaled = None

    # Reshape for LSTM input (samples, timesteps=1, features)
    X_scaled = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

    return X_scaled, y_scaled, scaler_X, scaler_y

# Load dataset (Ensure `df` is available)
X_scaled, y_scaled, scaler_X, scaler_y = preprocess_cyclone_data_lstm(df)

# Split data for training and testing
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

# Build LSTM Model
model = Sequential([
    LSTM(64, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dense(32, activation='relu'),
    Dense(2)  # Predicting (LAT, LON)
])

# Compile Model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Define Early Stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train Model
history = model.fit(
    X_train, y_train,
    epochs=50, batch_size=32,
    validation_data=(X_test, y_test),
    verbose=1, callbacks=[early_stopping]
)

# Save trained model for later inference
model.save("cyclone_lstm_model.h5")

# Predict
y_pred_scaled = model.predict(X_test)
y_pred = scaler_y.inverse_transform(y_pred_scaled)

# Display Sample Predictions
y_test_actual = scaler_y.inverse_transform(y_test)
for i in range(5):
    print(f"Actual: {y_test_actual[i]}, Predicted: {y_pred[i]}")

Epoch 1/50


  super().__init__(**kwargs)


[1m2527/2527[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 5ms/step - loss: 0.0727 - mae: 0.1120 - val_loss: 1.0720e-04 - val_mae: 0.0076
Epoch 2/50
[1m2527/2527[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - loss: 8.4055e-05 - mae: 0.0066 - val_loss: 6.6684e-05 - val_mae: 0.0065
Epoch 3/50
[1m2527/2527[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 5ms/step - loss: 5.0780e-05 - mae: 0.0052 - val_loss: 8.8522e-05 - val_mae: 0.0071
Epoch 4/50
[1m2527/2527[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 4ms/step - loss: 4.1799e-05 - mae: 0.0046 - val_loss: 3.0547e-05 - val_mae: 0.0039
Epoch 5/50
[1m2527/2527[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - loss: 3.0355e-05 - mae: 0.0040 - val_loss: 2.0953e-05 - val_mae: 0.0038
Epoch 6/50
[1m2527/2527[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - loss: 2.2312e-05 - mae: 0.0035 - val_loss: 8.9374e-06 - val_mae: 0.0023
Epoch 7/50
[1m2527/2527[0m [32m━━━



[1m632/632[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step
Actual: [17.6 87.2], Predicted: [17.537634 86.9107  ]
Actual: [18.2 84.6], Predicted: [18.213432 84.32055 ]
Actual: [ 11.5 111.7], Predicted: [ 11.43135 111.57744]
Actual: [ 25.  -60.4], Predicted: [ 24.967434 -60.16016 ]
Actual: [-14.8  76. ], Predicted: [-14.761486  75.89571 ]


In [None]:
# Load trained model and scalers
model = load_model("cyclone_lstm_model.h5")
scaler_X = joblib.load("scaler_X.pkl")
scaler_y = joblib.load("scaler_y.pkl")

# Load new cyclone data (`new_df` should be a Pandas DataFrame)
X_new, _, _, _ = preprocess_cyclone_data_lstm(new_df, training=False)

# Predict
y_pred_scaled = model.predict(X_new)
y_pred = scaler_y.inverse_transform(y_pred_scaled)

# Print predicted locations
print("Predicted cyclone locations (LAT, LON):", y_pred)

NameError: name 'load_model' is not defined