In [1]:
pip install langchain_openai langchain langchain_community

Collecting langchain_openai
  Downloading langchain_openai-0.3.7-py3-none-any.whl.metadata (2.3 kB)
Collecting langchain_community
  Downloading langchain_community-0.3.19-py3-none-any.whl.metadata (2.4 kB)
Collecting langchain-core<1.0.0,>=0.3.39 (from langchain_openai)
  Downloading langchain_core-0.3.41-py3-none-any.whl.metadata (5.9 kB)
Collecting openai<2.0.0,>=1.58.1 (from langchain_openai)
  Downloading openai-1.65.3-py3-none-any.whl.metadata (27 kB)
Collecting async-timeout<5.0.0,>=4.0.0 (from langchain)
  Downloading async_timeout-4.0.3-py3-none-any.whl.metadata (4.2 kB)
Collecting langchain
  Downloading langchain-0.3.20-py3-none-any.whl.metadata (7.7 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.8.1-py3-none-any.whl.metadata (3.5 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain-text-splitters<1.0.0,>=0.3.6 (from 

In [18]:
import requests
import numpy as np
import json
from scipy.interpolate import interp1d
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

OPENAI_API_KEY = "sk..."


def fetch_windborne_data(hours=24):
    all_data = {}
    for h in range(hours):
        url = f"https://a.windbornesystems.com/treasure/{h:02d}.json"
        response = requests.get(url)
        if response.status_code == 200:
            try:
                data = response.json()
                if isinstance(data, list):
                    all_data[h] = np.array(data)
            except:
                continue
    
    if not all_data:
        return None
    
    num_balloons = len(next(iter(all_data.values())))
    complete_data = np.full((hours, num_balloons, 3), np.nan)
    
    for h, data in all_data.items():
        complete_data[h] = data
    
    # Fill missing values with column mean
    for b in range(num_balloons):
        for d in range(3):
            valid_mask = ~np.isnan(complete_data[:, b, d])
            valid_times = np.where(valid_mask)[0]
            if len(valid_times) > 0:
                mean_value = np.nanmean(complete_data[:, b, d])
                complete_data[:, b, d] = np.where(np.isnan(complete_data[:, b, d]), mean_value, complete_data[:, b, d])
    
    return complete_data

all_positions = fetch_and_interpolate_data()

if all_positions is not None:
    print("Data Shape (after full interpolation):", all_positions.shape)
    print("Any NaN remaining?", np.isnan(all_positions).any())
else:
    print("No valid data available.")


Error decoding JSON from https://a.windbornesystems.com/treasure/01.json: Extra data: line 6 column 6 (char 93)
Failed to fetch https://a.windbornesystems.com/treasure/06.json, status code: 404
Data Shape (after full interpolation): (9, 1000, 3)
Any NaN remaining? False


In [44]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_squared_error, r2_score

def process_sequences(data, time_steps=5):
    """Prepare LSTM sequences for training, ensuring correct order."""
    num_balloons = data.shape[1]  # Number of balloons
    X, y = [], []
    scalers = [MinMaxScaler() for _ in range(num_balloons)]  # Store scalers for inverse transformation

    for i in range(num_balloons):
        balloon_data = data[:, i, :][::-1]  # Reverse order: [08, 07, ..., 00]
        balloon_data = scalers[i].fit_transform(balloon_data)  # Normalize

        # Generate time-step sequences
        for j in range(len(balloon_data) - time_steps):
            X.append(balloon_data[j:j+time_steps])  # Input sequence
            y.append(balloon_data[j+time_steps])    # Next step

    return np.array(X), np.array(y), scalers

# Prepare LSTM sequences
time_steps = 5
X, y, scalers = process_sequences(all_positions, time_steps)
print("LSTM Input Shape:", X.shape)  # Expected: (num_samples, time_steps, 3)

# Define Early Stopping
early_stop = EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)

# Define LSTM Model
model = Sequential([
    LSTM(64, return_sequences=True, activation="relu", input_shape=(time_steps, 3)),
    LSTM(32, return_sequences=False, activation="relu"),
    Dense(16, activation="relu"),
    Dense(3)  # Predicts (lat, lon, alt)
])

model.compile(optimizer=Adam(learning_rate=0.001, clipnorm=1.0), loss="mse")

# Train Model
history = model.fit(X, y, epochs=100, batch_size=16, validation_split=0.1, callbacks=[early_stop])

y_pred = model.predict(X)
mse = mean_squared_error(y.reshape(-1, 3), y_pred.reshape(-1, 3))
r2 = r2_score(y.reshape(-1, 3), y_pred.reshape(-1, 3))

print(f"Mean Squared Error (MSE): {mse:.6f}")
print(f"R² Score: {r2:.6f}")

def predict_next_position(model, all_positions, scalers, time_steps=5):
    """Predict the next step based on the latest known time steps, with correct time order."""
    num_balloons = len(scalers)
    latest_sequence = []

    for i in range(num_balloons):
        balloon_data = all_positions[:, i, :][::-1]  # Reverse order to match training
        balloon_data_scaled = scalers[i].transform(balloon_data)
        latest_sequence.append(balloon_data_scaled[-time_steps:])

    latest_sequence = np.array(latest_sequence).reshape(1, time_steps, 3)
    predicted_next_scaled = model.predict(latest_sequence)

    predicted_next_real = np.array([
        scalers[i].inverse_transform(predicted_next_scaled.reshape(1000, -1)) for i in range(num_balloons)
    ])

    return predicted_next_real

predicted_next_position = predict_next_position(model, all_positions, scalers, time_steps)
print("Predicted Next Positions (Lat, Lon, Alt):")
print(predicted_next_position)

def predict_next_position(model, all_positions, scalers, time_steps=5):
    """Predict the next step using the latest `04-00` data from each balloon."""
    num_balloons = len(scalers)
    latest_sequence = []

    for i in range(num_balloons):
        balloon_data = all_positions[:, i, :][::-1]
        balloon_data_scaled = scalers[i].transform(balloon_data)
        latest_sequence.append(balloon_data_scaled[:time_steps])

    latest_sequence = np.array(latest_sequence)

    predicted_next_scaled = []
    for i in range(num_balloons):
        balloon_input = latest_sequence[i].reshape(1, time_steps, 3)
        predicted = model.predict(balloon_input, verbose=0)
        predicted_next_scaled.append(predicted)

    predicted_next_real = np.array([
        scalers[i].inverse_transform(predicted_next_scaled[i]) for i in range(num_balloons)
    ])

    return predicted_next_real

# Predict the next time step
predicted_next_position = predict_next_position(model, all_positions, scalers, time_steps)

print("Predicted Next Positions (Lat, Lon, Alt):")
print(predicted_next_position)


LSTM Input Shape: (4000, 5, 3)
Epoch 1/100


  super().__init__(**kwargs)


[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - loss: 0.2199 - val_loss: 0.0631
Epoch 2/100
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0463 - val_loss: 0.0108
Epoch 3/100
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0122 - val_loss: 0.0130
Epoch 4/100
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0111 - val_loss: 0.0095
Epoch 5/100
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0098 - val_loss: 0.0086
Epoch 6/100
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0085 - val_loss: 0.0065
Epoch 7/100
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0083 - val_loss: 0.0059
Epoch 8/100
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0074 - val_loss: 0.0058
Epoch 9/100
[1m225/225[0m [32m━━━━━━━━━━━

ValueError: cannot reshape array of size 15000 into shape (1,5,3)

In [57]:
template = """You are an operational analyst for a weather balloon company. Analyze the data of 
balloon positions over 24H: {all_positions}, LSTM prediction: {LSTM prediction}, pred next position: {pred next position}

Extract 3 key insights, such as:
- Spatial clusters indicating wind patterns
- Anomalies (e.g., balloons stuck in one area)
- Suggestions for optimizing future launches
Format the response as a bullet-point report."""
prompt = ChatPromptTemplate.from_template(template)
model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model="gpt-4-turbo")  # Let users input their own key

# Generate insights
chain = prompt | model
report = chain.invoke({"all_positions": str(all_positions), "LSTM prediction": str(y_pred), "pred next position": str(predicted_next_position)})
print(report.content)

Here are three key insights derived from the JSON data of balloon positions over a 24-hour period along with an LSTM predictive model's output for the next position of balloons.

### 1. Identification of Spatial Clusters and Wind Patterns
- **Persistent Geographic Areas:** There are notable clusters around coordinates (-43 to -35 longitude, -95 to -100 latitude) and (63 longitude, -10 to -14 latitude), suggesting regular wind patterns in these regions that repeatedly carry the balloons on predictable paths. These clusters might indicate wind currents that consistently move in a particular direction, which can influence planning balloon trajectories for data coverage and longevity of the balloon's flight.
- **Variable Altitudes:** Altitudes in the regions with longitude from -3 to 63 and latitude ranging from -62 to 88 show significant fluctuations ranging from as low as approximately 3 meters to as high as 22 meters. This variation in altitude might reflect different atmospheric layers

In [None]:
# Save results
output = {
    "mse": mse,
    "r2": r2,
    "predicted_next_position": predicted_next_position.tolist(),
    "insights": report.content
}
with open("docs/output.json", "w") as f:
    json.dump(output, f, indent=4)

html_output = f"""
<html><head><title>Windborne Balloon Analysis</title></head>
<body>
    <h1>Windborne Balloon Analysis</h1>
    <p><strong>Mean Squared Error (MSE):</strong> {mse:.6f}</p>
    <p><strong>R² Score:</strong> {r2:.6f}</p>
    <h2>Predicted Next Position:</h2>
    <pre>{predicted_next_position.tolist()}</pre>
    <h2>Insights:</h2>
    <p>{report.content}</p>
</body></html>
"""
with open("docs/index.html", "w") as f:
    f.write(html_output)

print("✅ Output saved in docs/index.html and docs/output.json")
