# 🛸 UFO Forecast Analysis — Predicting Optimal Conditions

This notebook builds on the enriched UFO sightings dataset and analyzes future weather conditions to identify the most favorable locations for potential sightings in the next 7 days.

### Objectives:
- Score past sightings based on visibility, precipitation, and duration
- Filter for fall months and top 500 high-visibility sightings
- Fetch 7-day weather forecasts for those locations using Open-Meteo
- Calculate a predictive "UFO Weather Optimal" score for each day
- Identify and rank the best locations based on forecasted conditions
- Enrich results with city and country names via reverse geocoding
- Visualize the top locations using an interactive heat map

All utility functions are modularized in `forecast_utils.py` for clarity and reuse.

In [4]:
# ============================================
# 🔭 UFO Forecast Analysis — Notebook 2
# ============================================

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from time import sleep
from tqdm import tqdm
from forecast_utils import (
    get_forecast,
    enrich_forecast_df,
    pivot_forecast,
    get_location_info,
    weather_map_en,
    weather_visibility_scores,
)

# 1️⃣ Load enriched UFO dataset (with weather history)
url = "https://raw.githubusercontent.com/martin-paz-y/ufo_project/refs/heads/main/ufo_merged.csv"
ufo_weather_full = pd.read_csv(url)

# 2️⃣ Map weather code to description and visibility score
ufo_weather_full["weather_description"] = ufo_weather_full["weather_code"].map(weather_map_en).fillna("Unknown")
ufo_weather_full["weather_visibility_score"] = ufo_weather_full["weather_code"].map(weather_visibility_scores).fillna(50)

# 3️⃣ Calculate precipitation penalty (0–100)
ufo_weather_full["precipitation_penalty"] = ufo_weather_full["precipitation_sum"].clip(0, 10) * 10
ufo_weather_full["precipitation_penalty"] = ufo_weather_full["precipitation_penalty"].clip(0, 100)

# 4️⃣ Detect clear night
ufo_weather_full["is_clear_night"] = np.where(
    (ufo_weather_full["lighting_conditions"].str.lower() == "night") &
    (ufo_weather_full["weather_code"].isin([0, 1, 2])),
    1, 0
)

# 5️⃣ Duration score (log-normalized 0–100)
if "duration (seconds)" in ufo_weather_full.columns:
    max_duration = ufo_weather_full["duration (seconds)"].max() + 1
    ufo_weather_full["duration_score"] = (
        np.log(ufo_weather_full["duration (seconds)"] + 1) / np.log(max_duration) * 100
    ).round(2)
else:
    ufo_weather_full["duration_score"] = 50

# 6️⃣ Composite UFO Visibility Score
ufo_weather_full["UFO_Visibility_Score"] = (
    ufo_weather_full["weather_visibility_score"] * 0.40 +
    (100 - ufo_weather_full["precipitation_penalty"]) * 0.20 +
    ufo_weather_full["is_clear_night"] * 100 * 0.20 +
    ufo_weather_full["duration_score"] * 0.20
).round(2)

# 7️⃣ Optimal score without duration
ufo_weather_full["UFO_weather_optimal"] = (
    ufo_weather_full["weather_visibility_score"] * 0.60 +
    (100 - ufo_weather_full["precipitation_penalty"]) * 0.20 +
    ufo_weather_full["is_clear_night"] * 100 * 0.20
).round(2)

# 8️⃣ Filter for fall months and top 1000 locations
ufo_weather_full["date"] = pd.to_datetime(ufo_weather_full["date"], errors="coerce")
ufo_fall = ufo_weather_full[ufo_weather_full["date"].dt.month.isin([9, 10, 11])]
top_1000_coords = (
    ufo_fall.groupby(["latitude", "longitude"])
    .size()
    .sort_values(ascending=False)
    .head(1000)
    .reset_index()[["latitude", "longitude"]]
)
ufo_top_1000_fall = ufo_fall.merge(top_1000_coords, on=["latitude", "longitude"], how="inner")

# 9️⃣ Select top 500 sightings by visibility score
top_500 = ufo_top_1000_fall.nlargest(500, "UFO_Visibility_Score").copy()

# 🔟 Get forecast range
start_date = datetime.today().strftime("%Y-%m-%d")
end_date = (datetime.today() + timedelta(days=6)).strftime("%Y-%m-%d")

# 1️⃣1️⃣ Fetch 7-day forecast for each location
forecast_data = []
for _, row in top_500.iterrows():
    lat, lon = row["latitude"], row["longitude"]
    forecast = get_forecast(lat, lon, start_date, end_date)
    if forecast and "daily" in forecast:
        daily = forecast["daily"]
        for i in range(len(daily["time"])):
            forecast_data.append({
                "latitude": lat,
                "longitude": lon,
                "date": daily["time"][i],
                "temp_max": daily["temperature_2m_max"][i],
                "temp_min": daily["temperature_2m_min"][i],
                "precipitation_sum": daily["precipitation_sum"][i],
                "weather_code": daily["weather_code"][i]
            })
    sleep(1)

# 1️⃣2️⃣ Create forecast DataFrame and enrich
forecast_df = pd.DataFrame(forecast_data)
forecast_df = enrich_forecast_df(forecast_df)

# 1️⃣3️⃣ Pivot to wide format and rank by mean score
forecast_pivot_sorted = pivot_forecast(forecast_df)

# 1️⃣4️⃣ Add city and country via reverse geocoding
tqdm.pandas(desc="Fetching city and country...")
forecast_pivot_sorted[["city", "country"]] = forecast_pivot_sorted.progress_apply(
    lambda row: get_location_info(row["latitude"], row["longitude"]),
    axis=1
)

# 1️⃣5️⃣ Display top results
forecast_pivot_sorted.head(10)

Fetching city and country...: 100%|██████████| 331/331 [05:33<00:00,  1.01s/it]


Unnamed: 0,latitude,longitude,optimal_day_1,optimal_day_2,optimal_day_3,optimal_day_4,optimal_day_5,optimal_day_6,optimal_day_7,mean_optimal_score,city,country
46,32.74,-97.11,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,Arlington,United States
40,32.45,-99.73,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,Abilene,United States
44,32.73,-114.62,94.0,94.0,100.0,100.0,100.0,100.0,100.0,98.285714,Winterhaven,United States
67,33.44,-112.36,94.0,94.0,100.0,100.0,100.0,100.0,100.0,98.285714,Goodyear,United States
33,31.95,-98.74,100.0,100.0,100.0,100.0,100.0,100.0,88.0,98.285714,Sidney,United States
62,33.37,-112.58,94.0,94.0,94.0,100.0,100.0,100.0,100.0,97.428571,Buckeye,United States
64,33.41,-111.91,94.0,88.0,100.0,100.0,100.0,100.0,88.0,95.714286,Tempe,United States
80,33.63,-112.33,88.0,88.0,94.0,94.0,100.0,100.0,100.0,94.857143,Surprise,United States
73,33.54,-112.19,94.0,94.0,94.0,94.0,94.0,94.0,100.0,94.857143,Glendale,United States
52,32.88,-111.76,94.0,88.0,100.0,100.0,100.0,94.0,88.0,94.857143,Casa Grande,United States


In [5]:
import folium
from folium.plugins import HeatMap

# 1️⃣ Calcular o centro do mapa
map_center = [
    forecast_pivot_sorted["latitude"].mean(),
    forecast_pivot_sorted["longitude"].mean()
]

# 2️⃣ Criar o mapa base
heatmap = folium.Map(location=map_center, zoom_start=4)

# 3️⃣ Preparar os dados para o HeatMap
heat_data = [
    [row["latitude"], row["longitude"], row["mean_optimal_score"]]
    for _, row in forecast_pivot_sorted.iterrows()
]

# 4️⃣ Adicionar camada de calor ao mapa
HeatMap(
    heat_data,
    radius=15,
    max_zoom=6,
    blur=10,
    min_opacity=0.4
).add_to(heatmap)

# 5️⃣ Mostrar o mapa
heatmap