In [2]:
import pandas as pd
import numpy as np
import joblib
import geojson

# 1. Load your best model and the scaler
print("Loading the saved model and scaler...")
model = joblib.load('best_shark_model_lightgbm.pkl')
scaler = joblib.load('shark_model_scaler.pkl')
print("Model and scaler loaded successfully.")

# 2. Create a Grid for the North Atlantic
print("Creating prediction grid...")
min_lon, max_lon = -80, -60
min_lat, max_lat = 30, 50
grid_resolution = 0.5 # Degrees

lons = np.arange(min_lon, max_lon, grid_resolution)
lats = np.arange(min_lat, max_lat, grid_resolution)
lon_grid, lat_grid = np.meshgrid(lons, lats)

grid_df = pd.DataFrame({
    'lon': lon_grid.flatten(),
    'lat': lat_grid.flatten()
})

# 3. Engineer the SAME features for the grid that the model was trained on
print("Engineering features for the grid...")
month = 8  # Predict for August
hour = 22  # Predict for 10 PM

# Time features
grid_df['hour_sin'] = np.sin(2 * np.pi * hour/24.0)
grid_df['hour_cos'] = np.cos(2 * np.pi * hour/24.0)
grid_df['month_sin'] = np.sin(2 * np.pi * month/12.0)
grid_df['month_cos'] = np.cos(2 * np.pi * month/12.0)

# --- ADDED FIX: Re-create the original 'hour' and 'month' columns ---
# The saved scaler expects to see these columns, so we add them here.
grid_df['hour'] = hour
grid_df['month'] = month

# Add all other feature columns with a neutral value (like 0 or the median)
placeholder_features = [
    'speed_avg_roll', 'speed_std_roll', 'angle_avg_roll', 'angle_std_roll',
    'speed_percentile', 'is_slow', 'speed_change', 'high_turn_angle',
    'angle_consistency', 'speed_angle_ratio', 'movement_efficiency',
    'turning_intensity', 'lat_rounded', 'lon_rounded', 'distance_from_center',
    'dawn_dusk', 'night', 'speed_z_score', 'angle_z_score'
]
for feat in placeholder_features:
    grid_df[feat] = 0.0

# Recreate location-based features with actual values
grid_df['lat_rounded'] = np.round(grid_df['lat'], 2)
grid_df['lon_rounded'] = np.round(grid_df['lon'], 2)
# Approximate center of the data to calculate distance
lat_mean_approx = 40.0
lon_mean_approx = -70.0
grid_df['distance_from_center'] = np.sqrt((grid_df['lat'] - lat_mean_approx)**2 + (grid_df['lon'] - lon_mean_approx)**2)

# Get the feature names IN THE CORRECT ORDER from the scaler object
try:
    # For newer scikit-learn versions
    X_train_columns = scaler.get_feature_names_out()
except AttributeError:
    # For older scikit-learn versions
    X_train_columns = scaler.feature_names_in_

# Ensure the column order is exactly the same as the training data
grid_df = grid_df[X_train_columns]

# 4. Scale the Grid Features
print("Scaling grid features...")
grid_scaled = scaler.transform(grid_df)

# 5. Make Predictions
print("Predicting foraging probability on the grid...")
probabilities = model.predict_proba(grid_scaled)[:, 1]
grid_df['probability'] = probabilities
print("Prediction complete.")

# 6. Export to GeoJSON
print("Exporting results to hotspots.geojson...")
features = []
for i, row in grid_df.iterrows():
    if row['probability'] > 0.6: # You can adjust this threshold
        point = geojson.Point((row['lon'], row['lat']))
        feature = geojson.Feature(geometry=point, properties={'probability': round(float(row['probability']), 4)})
        features.append(feature)

feature_collection = geojson.FeatureCollection(features)
with open('hotspots.geojson', 'w') as f:
    geojson.dump(feature_collection, f)

print("\n✅ Phase 3 Complete! Your final, high-performance 'hotspots.geojson' is ready.")

Loading the saved model and scaler...
Model and scaler loaded successfully.
Creating prediction grid...
Engineering features for the grid...
Scaling grid features...
Predicting foraging probability on the grid...




Prediction complete.
Exporting results to hotspots.geojson...

✅ Phase 3 Complete! Your final, high-performance 'hotspots.geojson' is ready.
