In [5]:
# step-1  Install packages
!pip install stable-baselines3[extra] gymnasium numpy==1.26.4 pandas==2.2.2 matplotlib shimmy>=1.0 torch==2.5.0 torchvision torchaudio google-generativeai --upgrade --no-cache-dir


In [6]:
# step -2 - Import Dependencies
import requests
import time
import json
import gymnasium as gym
import numpy as np
import pandas as pd
import torch
from stable_baselines3 import PPO, DQN
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from scipy.spatial.distance import euclidean
import joblib
from flask import Flask, request, jsonify
from datetime import datetime
import csv
import matplotlib.pyplot as plt
from datetime import datetime

In [7]:
#step-3  - fetch and process live flight data
def get_live_flight_data():
    url = "https://opensky-network.org/api/states/all"

    for attempt in range(2):  # Retry twice
        try:
            response = requests.get(url, timeout=3)
            if response.status_code == 200:
                print(" Fetched live flight data!")
                return response.json()
            else:
                print(f" API Error {response.status_code}, retrying...")
        except requests.exceptions.RequestException as e:
            print(f" Request failed: {e}, retrying...")
            time.sleep(1)

    print(" API unreachable after retries.")
    return None

# Process and Clean the Data
def process_flight_data():
    live_data = get_live_flight_data()

    if live_data and "states" in live_data:
        df_raw = pd.DataFrame(live_data["states"])

        #  Check raw data shape
        print(f" Raw Data Retrieved: {df_raw.shape[0]} rows")

        if df_raw.empty:
            print(" No live data returned from OpenSky API.")
            return pd.DataFrame()

        #  Extract relevant columns (adjust index positions if needed)
        df = df_raw.iloc[:, [0, 1, 2, 3, 5, 6, 7, 9, 10, 11]].copy()
        df.columns = [
            "icao24", "callsign", "origin_country", "time_position",
            "longitude", "latitude", "altitude", "velocity", "heading", "vertical_rate"
        ]

        #  Replace NaNs with default values temporarily for debugging
        df["altitude"].fillna(0, inplace=True)
        df["velocity"].fillna(0, inplace=True)

        #  Convert numeric columns explicitly
        num_cols = ["altitude", "velocity", "longitude", "latitude", "heading", "vertical_rate"]
        df[num_cols] = df[num_cols].apply(pd.to_numeric, errors="coerce").fillna(0.0)

        #  Filter high-altitude flights (OPTIONAL in early tests)
        df_filtered = df[df["altitude"] > 10000]

        if df_filtered.empty:
            print(" No flights found above 10,000 ft. Returning all flights for testing.")
            df_filtered = df.head(10)  # Return at least 10 flights for testing

        print(f" Processed {df_filtered.shape[0]} flights (after filtering).")
        return df_filtered.reset_index(drop=True)

    print(" No valid states in API response.")
    return pd.DataFrame()

df = process_flight_data()


 Fetched live flight data!
 Raw Data Retrieved: 7224 rows
 Processed 2481 flights (after filtering).
   icao24  callsign  origin_country  time_position  longitude  latitude  \
0  408120  VIR128K   United Kingdom   1.741655e+09   -70.7427   42.2980   
1  a7b08d  LXJ595     United States   1.741655e+09   -99.2319   39.6446   
2  a3b87f  DAL1207    United States   1.741655e+09   -86.9189   38.8773   
3  880453  AIQ8274         Thailand   1.741655e+09   100.1117   14.0696   
4  a34e6a  ENY4275    United States   1.741655e+09   -89.6520   36.4660   

   altitude  velocity  heading  vertical_rate  
0  10584.18    273.10    63.97           3.90  
1  13106.40    194.30   263.16          -0.33  
2  10363.20    222.02   226.88           0.00  
3  11574.78    219.75   193.13           0.33  
4  10668.00    238.63    44.83           0.00  


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["altitude"].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["velocity"].fillna(0, inplace=True)


In [8]:
# step 4 - Assess Collision Risk
from scipy.spatial.distance import cdist

def check_live_collision_risk(df, threshold_distance=500):
    if df.empty:
        print(" No aircraft data available for collision check.")
        return []

    #  Convert positions to NumPy array for fast vectorized operations
    positions = df[["latitude", "longitude", "altitude"]].to_numpy()

    #  Compute all pairwise distances in one step
    distances = cdist(positions, positions, metric="euclidean")

    warnings = []
    num_aircraft = len(df)

    for i in range(num_aircraft):
        for j in range(i + 1, num_aircraft):
            if distances[i, j] < threshold_distance:
                warnings.append({
                    "Aircraft 1": df.iloc[i]["icao24"],
                    "Aircraft 2": df.iloc[j]["icao24"],
                    "Distance": distances[i, j]
                })

    return warnings


#  Run collision risk check only if df is not empty
if not df.empty:
    collision_warnings = check_live_collision_risk(df)
    if collision_warnings:
        print(" Potential Collisions Detected!")
        for warning in collision_warnings:
            print(warning)
    else:
        print(" No immediate collision risks detected.")
else:
    print(" No flight data available. Skipping collision check.")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
{'Aircraft 1': 'c03069', 'Aircraft 2': '0d0ff8', 'Distance': 291.75269194730794}
{'Aircraft 1': 'c03069', 'Aircraft 2': 'a5fef4', 'Distance': 47.78271229011598}
{'Aircraft 1': 'c03069', 'Aircraft 2': '06a19d', 'Distance': 127.59272210032199}
{'Aircraft 1': 'c03069', 'Aircraft 2': 'a0b59d', 'Distance': 32.73977106227226}
{'Aircraft 1': 'c03069', 'Aircraft 2': '8013da', 'Distance': 336.7505137472838}
{'Aircraft 1': 'c03069', 'Aircraft 2': 'ac90a8', 'Distance': 13.000691583142805}
{'Aircraft 1': 'c03069', 'Aircraft 2': 'c04ddc', 'Distance': 41.32712302120728}
{'Aircraft 1': 'c03069', 'Aircraft 2': '885156', 'Distance': 411.35832840730325}
{'Aircraft 1': 'c03069', 'Aircraft 2': '7808ac', 'Distance': 345.3897678637854}
{'Aircraft 1': 'c03069', 'Aircraft 2': '0ac9f0', 'Distance': 305.16624486828596}
{'Aircraft 1': 'c03069', 'Aircraft 2': 'a9b06f', 'Distance': 306.87990615713284}
{'Aircraft 1': 'c03069', 'Aircraft 2': 'aca665', 

In [1]:
# Calculate Real Distances and TTC

def process_live_flight_data_with_features():
    """
    Fetch, preprocess, and extract features from live flight data.
    Calculates real distances and time-to-collision (TTC).
    """
    live_data = get_live_flight_data()

    if not live_data or "states" not in live_data:
        print(" No live data available from API.")
        return pd.DataFrame()

    #  Convert to DataFrame
    columns = [
        "icao24", "callsign", "origin_country", "time_position", "last_contact",
        "longitude", "latitude", "altitude", "on_ground", "velocity",
        "heading", "vertical_rate", "sensors", "baro_altitude", "squawk",
        "spi", "position_source"
    ]

    df = pd.DataFrame(live_data["states"], columns=columns)

    if df.empty:
        print(" Live data returned an empty DataFrame.")
        return df

    #  Define numeric columns
    numeric_cols = ["altitude", "velocity", "heading", "longitude", "latitude"]

    #  Convert numeric columns to float (coerce errors)
    df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors="coerce")

    #  Drop invalid rows
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    df.dropna(subset=numeric_cols, inplace=True)

    #  Filter for high altitude flights (>10,000 ft)
    df = df[df["altitude"] > 10000]

    if df.empty:
        print(" No high-altitude flights found.")
        return df

    #  Calculate distances and TTC between all pairs
    distances = []
    ttcs = []

    for i in range(len(df)):
        for j in range(i + 1, len(df)):
            ac1 = df.iloc[i]
            ac2 = df.iloc[j]

            pos1 = (ac1["latitude"], ac1["longitude"], ac1["altitude"])
            pos2 = (ac2["latitude"], ac2["longitude"], ac2["altitude"])

            distance = euclidean(pos1, pos2)

            rel_speed = abs(ac1["velocity"] - ac2["velocity"])
            ttc = distance / rel_speed if rel_speed > 0 else float('inf')

            distances.append(distance)
            ttcs.append(ttc)

    #  Assign min distance and TTC across all comparisons
    df["distance"] = min(distances) if distances else 0.0
    df["time_to_collision"] = min(ttcs) if ttcs else float('inf')
    df["vertical_rate_diff"] = 0.0  # Placeholder (optional)

    #  Convert everything to float32
    df[["distance", "time_to_collision", "vertical_rate_diff"]] = df[
        ["distance", "time_to_collision", "vertical_rate_diff"]
    ].astype(np.float32)

    print(f" Processed {len(df)} high-altitude flights with distance and TTC features.")
    return df


    #  Filter only numeric columns needed for the model
    df_filtered = df_cleaned[numeric_cols].astype(np.float32)



ModuleNotFoundError: No module named 'stable_baselines3'

In [None]:

#   Define Custom Collision Avoidance Environment
class CollisionAvoidanceEnv(gym.Env):
    def __init__(self, df, max_steps=50):  # Reduced max_steps for quicker testing
        super(CollisionAvoidanceEnv, self).__init__()
        self.df = df.astype(np.float32).values
        self.current_step = 0
        self.max_steps = max_steps

        # Action space (3 discrete actions)
        self.action_space = gym.spaces.Discrete(3)
        # Observation space (6 values)
        self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(6,), dtype=np.float32)

    def reset(self, seed=None, options=None):
        self.current_step = 0
        if len(self.df) == 0:
            print(" Empty flight data. Returning zeros.")
            obs = np.zeros(self.observation_space.shape, dtype=np.float32)
            return obs, {}
        obs = self._get_observation()
        return obs, {}

    def _get_observation(self):
        if self.current_step >= len(self.df):
            print(" Out of bounds. Returning zeros.")
            return np.zeros(self.observation_space.shape, dtype=np.float32)
        row = self.df[self.current_step]
        return np.array([
            row[0],  # Distance
            row[1] - row[2],  # Altitude diff
            row[3] - row[4],  # Velocity diff
            row[5] - row[6],  # Heading diff
            row[7] - row[8],  # Vertical rate diff
            row[9] if len(row) > 9 else 0.0  # Time to collision
        ], dtype=np.float32)

    def step(self, action):
        self.current_step += 1

        if self.current_step >= len(self.df):
            print(" Step exceeds dataset length. Ending episode.")
            obs = np.zeros(self.observation_space.shape, dtype=np.float32)
            reward = -100
            done = True
            truncated = False
            return obs, reward, done, truncated, {}

        distance = self.df[self.current_step][0]

        # Reward logic for testing
        if distance < 100:
            reward = -100  # Immediate collision danger!
        elif distance > 300:
            reward = 100    # Safe zone
        elif action == 1:
            reward = 50 - (self.current_step * 0.5)  # Climb reward
        elif action == 2:
            reward = 30 - (self.current_step * 0.5)  # Turn reward
        else:
            reward = -10  # Neutral / Penalty

        done = self.current_step >= self.max_steps or distance > 300
        truncated = False
        obs = self._get_observation()

        return obs, reward, done, truncated, {}



# #  Step 3: Create Environment
# env = DummyVecEnv([lambda: CollisionAvoidanceEnv(df_cleaned, max_steps=50)])

# # Define PPO Model
# device = "cuda" if torch.cuda.is_available() else "cpu"
# print(f"🚀 Using device: {device}")

# ppo_model = PPO(
#     "MlpPolicy",
#     env,
#     verbose=1,
#     learning_rate=0.0003,
#     batch_size=32,
#     n_steps=512,
#     n_epochs=5,
#     gamma=0.99,
#     gae_lambda=0.95,
#     clip_range=0.2,
#     device=device
)

# # Train PPO Model
# print("\n🚀 Starting PPO training (Quick Test Version)...")
# ppo_model.learn(total_timesteps=2000)
# ppo_model.save("ppo_quick_test_model")
# print("\n PPO Model Training Completed and Saved!")

# # Evaluate PPO Model
# obs = env.reset()
# done = [False]
# step_counter = 0
# max_eval_steps = 50  # Quick evaluation step limit

# print("\n🚀 Starting PPO Evaluation...")
# while not done[0] and step_counter < max_eval_steps:
#     action, _ = ppo_model.predict(obs, deterministic=True)
#     obs, reward, done, info = env.step(action)

#     if step_counter % 10 == 0:
#         print(f"Step {step_counter + 1}: Action: {action}, Reward: {reward}, Done: {done[0]}")

#     step_counter += 1

# if step_counter >= max_eval_steps:
#     print("\n Max evaluation steps reached! Potential infinite loop avoided.")
# else:
#     print("\n PPO Evaluation Completed Successfully!")

In [None]:
# Prepare Dataset for Training

df_cleaned = pd.DataFrame({
    "distance": np.random.uniform(0, 1000, 500),
    "altitude": np.random.uniform(10000, 40000, 500),
    "velocity": np.random.uniform(200, 600, 500),
    "heading": np.random.uniform(0, 360, 500),
    "latitude": np.random.uniform(-90, 90, 500),
    "longitude": np.random.uniform(-180, 180, 500),
    "vertical_rate": np.random.uniform(-50, 50, 500),
    "baro_altitude": np.random.uniform(10000, 40000, 500),
    "col9": np.random.uniform(0, 100, 500),
    "col10": np.random.uniform(0, 100, 500)
}).astype(np.float32)

print(f"✅ Cleaned Data Sample:\n{df_cleaned.head()}")

In [None]:
#  Check if DQN model exists before loading
import os
dqn_model_path = "/content/collision_avoidance_dqn_retrained.zip"

if os.path.exists(dqn_model_path):
    print(" DQN model found. Loading now...")
    dqn_model = DQN.load(dqn_model_path.replace(".zip", ""), env=env, device=device)

else:
    print(" DQN Model Not Found! Training a new one...")

    #  Train DQN model
    dqn_model = DQN(
        "MlpPolicy",
        env,
        verbose=1,
        learning_rate=0.0005,
        batch_size=64,
        buffer_size=50000,
        learning_starts=1000,
        gamma=0.99,
        train_freq=4,
        device=device
    )

    print("\n🚀 Training the DQN Model...")
    dqn_model.learn(total_timesteps=10000)

    #  Save trained DQN model
    dqn_model.save(dqn_model_path.replace(".zip", ""))
    print(f"\n DQN Model Training Completed. Model saved at {dqn_model_path}")


In [None]:
#  Step 7: Evaluate DQN and PPO Models

import os

#  Ensure Environment Exists Before Evaluating Models
if 'env' in locals():
    #  Check if DQN Model Exists
    dqn_model_path = "/content/collision_avoidance_dqn_retrained.zip"

    if os.path.exists(dqn_model_path):
        print(f" DQN Model Found at: {dqn_model_path}. Loading model...")

        #  Load the DQN Model
        try:
            dqn_model = DQN.load(dqn_model_path.replace(".zip", ""), env=env, device=device)
            print(" DQN Model Loaded Successfully!")

            #  Evaluate DQN Model
            dqn_mean_reward, dqn_std_reward = evaluate_policy(
                dqn_model,
                env,
                n_eval_episodes=5,
                deterministic=True
            )
            print(f"\n📊 DQN Model Evaluation Results:")
            print(f"➡️ Average Reward: {dqn_mean_reward:.2f} ± {dqn_std_reward:.2f}")

        except Exception as e:
            print(f" Error loading or evaluating DQN model: {e}")

    else:
        print(f" DQN Model Not Found at: {dqn_model_path}. Skipping DQN evaluation.")

    #  Evaluate PPO Model
    try:
        ppo_mean_reward, ppo_std_reward = evaluate_policy(
            ppo_model,
            env,
            n_eval_episodes=5,
            deterministic=True
        )
        print(f"\n📊 PPO Model Evaluation Results:")
        print(f"➡️ Average Reward: {ppo_mean_reward:.2f} ± {ppo_std_reward:.2f}")

    except Exception as e:
        print(f" Error during PPO Model Evaluation: {e}")

else:
    print(" No environment available. Skipping model evaluations.")


In [None]:
#  Reset environment
obs = env.reset()
done = [False]  # DummyVecEnv returns a list of booleans

while not done[0]:
    action, _ = ppo_model.predict(obs, deterministic=True)

    # Step: 4 values from DummyVecEnv
    obs, reward, done, info = env.step(action)

    # Logging what’s happening inside
    print(f"Action: {action}, Reward: {reward}, Done: {done[0]}")


print(" Evaluation Completed!")

In [None]:
obs = env.reset()

done = [False]  # DummyVecEnv returns a list of dones

while not done[0]:
    action, _ = ppo_model.predict(obs, deterministic=True)
    obs, reward, done, info = env.step(action)

    print(f"Action: {action}, Reward: {reward}, Done: {done[0]}")

print(" Evaluation Completed!")


In [None]:
distance = obs[0][0]  # Assuming DummyVecEnv wrapping
print(f"Distance: {distance}, Obs: {obs}")


In [None]:
# step -8 Code to Validate PPO and DQN Models these are the evaluation steps after performing caluclations to input the real data
import os
from stable_baselines3 import PPO, DQN
import torch

# Define the device (GPU if available)
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f" Device in use: {device}")

# Validate PPO Model
ppo_model_path = "/content/ppo_quick_test_model"
if os.path.exists(ppo_model_path + ".zip"):
    print(f" Found PPO model at {ppo_model_path}")
    try:
        ppo_model = PPO.load(ppo_model_path, device=device)
        print(" PPO model loaded successfully!")
    except Exception as e:
        print(f" PPO model failed to load! Error: {e}")
else:
    print(f" PPO model not found at {ppo_model_path}")

# Validate DQN Model
dqn_model_path = "/content/collision_avoidance_dqn_retrained"
if os.path.exists(dqn_model_path + ".zip"):
    print(f" Found DQN model at {dqn_model_path}")
    try:
        dqn_model = DQN.load(dqn_model_path, device=device)
        print(" DQN model loaded successfully!")
    except Exception as e:
        print(f" DQN model failed to load! Error: {e}")
else:
    print(f" DQN model not found at {dqn_model_path}")


In [None]:
# step - 9  Add Logging to CSV/JSON & Visualization for Model Performance
# Logging Evaluation Results to CSV
import pandas as pd
from datetime import datetime

# Create a CSV log file
log_file = "model_evaluation_log.csv"

# Initialize log file with headers (do this once)
if not os.path.exists(log_file):
    log_df = pd.DataFrame(columns=["Timestamp", "Model", "Mean Reward", "Std Reward"])
    log_df.to_csv(log_file, index=False)

def log_evaluation(model_name, mean_reward, std_reward):
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    new_entry = pd.DataFrame([[timestamp, model_name, mean_reward, std_reward]],
                             columns=["Timestamp", "Model", "Mean Reward", "Std Reward"])
    new_entry.to_csv(log_file, mode='a', header=False, index=False)
    print(f" Logged {model_name} evaluation to {log_file}")


In [None]:
# Log PPO results
log_evaluation("PPO", ppo_mean_reward, ppo_std_reward)

# Log DQN results
log_evaluation("DQN", dqn_mean_reward, dqn_std_reward)


In [None]:
# step-10 loading the both models and evaluating and then plotting the results

# Reload the environment and models
def make_env():
    # Replace this with your actual environment creation logic
    return CollisionAvoidanceEnv(df_cleaned.copy(), max_steps=100)

env = DummyVecEnv([make_env])

# Load PPO and DQN models (update paths if different)
ppo_model = PPO.load("/content/ppo_quick_test_model", env=env)
dqn_model = DQN.load("/content/collision_avoidance_dqn_retrained", env=env)

print(" PPO and DQN Models Loaded Successfully!")


# ---------
#  Step 2: Run Evaluations and Log Results
# ---------

logs = []

num_iterations = 5          # Run 5 evaluation cycles (adjust as needed)
n_eval_episodes = 5         # Evaluate over 5 episodes each time

for iteration in range(num_iterations):
    print(f"\n🚀 Running Evaluation Iteration {iteration + 1}/{num_iterations}...")

    # Evaluate PPO
    ppo_mean_reward, ppo_std_reward = evaluate_policy(ppo_model, env, n_eval_episodes=n_eval_episodes)
    print(f"PPO Mean Reward: {ppo_mean_reward}, Std: {ppo_std_reward}")

    # Evaluate DQN
    dqn_mean_reward, dqn_std_reward = evaluate_policy(dqn_model, env, n_eval_episodes=n_eval_episodes)
    print(f"DQN Mean Reward: {dqn_mean_reward}, Std: {dqn_std_reward}")

    # Log timestamped results
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    logs.append({
        "Timestamp": timestamp,
        "Model": "PPO",
        "Mean Reward": ppo_mean_reward,
        "Std Reward": ppo_std_reward
    })
    logs.append({
        "Timestamp": timestamp,
        "Model": "DQN",
        "Mean Reward": dqn_mean_reward,
        "Std Reward": dqn_std_reward
    })

    # Optional wait between evaluations
    time.sleep(3)



log_df = pd.DataFrame(logs)
log_file = "model_evaluation_log.csv"
log_df.to_csv(log_file, index=False)

print(f"\n Evaluation logs saved to: {log_file}")


def plot_model_performance(log_file):
    df = pd.read_csv(log_file)

    if df.empty:
        print(" No log data to plot.")
        return

    # Separate PPO and DQN data
    ppo_data = df[df["Model"] == "PPO"]
    dqn_data = df[df["Model"] == "DQN"]

    plt.figure(figsize=(12, 6))

    # Plot PPO Mean Rewards
    plt.plot(ppo_data["Timestamp"], ppo_data["Mean Reward"], label="PPO Mean Reward", marker='o')
    plt.fill_between(ppo_data["Timestamp"],
                     ppo_data["Mean Reward"] - ppo_data["Std Reward"],
                     ppo_data["Mean Reward"] + ppo_data["Std Reward"],
                     alpha=0.2)

    # Plot DQN Mean Rewards
    plt.plot(dqn_data["Timestamp"], dqn_data["Mean Reward"], label="DQN Mean Reward", marker='o')
    plt.fill_between(dqn_data["Timestamp"],
                     dqn_data["Mean Reward"] - dqn_data["Std Reward"],
                     dqn_data["Mean Reward"] + dqn_data["Std Reward"],
                     alpha=0.2)

    plt.xlabel("Timestamp")
    plt.ylabel("Mean Reward")
    plt.title("PPO vs DQN Model Evaluation Over Time")
    plt.legend()
    plt.xticks(rotation=45)
    plt.grid(True)
    plt.tight_layout()
    plt.show()

# Call the plotting function
plot_model_performance(log_file)

In [None]:
# step -11 Setting Up Hyperparameter Tuning
# We'll test different values for:
# learning_rate
# gamma
# batch_size
# n_steps
# And compare performance.


#  PPO Hyperparameter Options
learning_rates = [0.0001, 0.0003, 0.0005]
gammas = [0.95, 0.98, 0.99]
batch_sizes = [32, 64, 128]
n_steps_options = [512, 1024, 2048]

#  Store best config and reward
best_mean_reward = -float('inf')
best_params = {}

#  Hyperparameter tuning loop
for lr in learning_rates:
    for gamma in gammas:
        for batch_size in batch_sizes:
            for n_steps in n_steps_options:

                print(f"\n🚀 Training PPO with lr={lr}, gamma={gamma}, batch_size={batch_size}, n_steps={n_steps}")

                #  Re-create environment each time (important for stability)
                env = DummyVecEnv([lambda: CollisionAvoidanceEnv(df_cleaned, max_steps=100)])

                #  Train PPO with current hyperparameters
                ppo_model = PPO(
                "MlpPolicy",
                env,
                verbose=1,
                learning_rate=1e-3,      # Faster learning for quick testing
                batch_size=32,           # Smaller batch, faster updates
                n_steps=512,             # Faster training loop
                n_epochs=5,              # Fewer epochs to speed up
                gamma=0.95,              # Less "memory" of old rewards
                gae_lambda=0.9,          # Lower bias/variance tradeoff
                clip_range=0.2,
                device=device
            )

                #  Train the PPO Model fast
                ppo_model.learn(total_timesteps=5000)

                #  Evaluate
                mean_reward, std_reward = evaluate_policy(ppo_model, env, n_eval_episodes=5)
                print(f"📊 Mean Reward: {mean_reward:.2f} ± {std_reward:.2f}")

                #  Update best if current is better
                if mean_reward > best_mean_reward:
                    best_mean_reward = mean_reward
                    best_params = {
                        'learning_rate': lr,
                        'gamma': gamma,
                        'batch_size': batch_size,
                        'n_steps': n_steps
                    }

print("\n Hyperparameter tuning completed!")
print(f"🏆 Best Mean Reward: {best_mean_reward:.2f}")
print(f"🔧 Best Hyperparameters: {best_params}")


In [None]:
# !pip install optuna

Collecting optuna
  Downloading optuna-4.2.1-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.1-py3-none-any.whl.metadata (7.2 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.9-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.2.1-py3-none-any.whl (383 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.6/383.6 kB[0m [31m28.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.1-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.8/231.8 kB[0m [31m19.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Downloading Mako-1.3.9-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.5/78.5 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: M

In [None]:
    #  Assume df_cleaned is your prepared dataset
# Example dummy dataset
df_cleaned = pd.DataFrame({
    "distance": np.random.uniform(0, 1000, 500),
    "altitude": np.random.uniform(10000, 40000, 500),
    "velocity": np.random.uniform(200, 600, 500),
    "heading": np.random.uniform(0, 360, 500),
    "latitude": np.random.uniform(-90, 90, 500),
    "longitude": np.random.uniform(-180, 180, 500),
    "vertical_rate": np.random.uniform(-50, 50, 500),
    "baro_altitude": np.random.uniform(10000, 40000, 500),
    "col9": np.random.uniform(0, 100, 500),
    "col10": np.random.uniform(0, 100, 500)
}).astype(np.float32)

#  Create environment and wrap in DummyVecEnv for compatibility
env = DummyVecEnv([lambda: ImprovedCollisionAvoidanceEnv(df_cleaned.copy(), max_steps=100)])

#  Define device for training
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"🚀 Using device: {device}")

#  Create PPO Model
ppo_model = PPO(
    "MlpPolicy",
    env,
    verbose=1,
    learning_rate=0.0003,
    batch_size=64,
    n_steps=1024,
    n_epochs=10,
    gamma=0.99,
    gae_lambda=0.95,
    clip_range=0.2,
    device=device
)

#  Train the PPO Model
print("🚀 Training PPO Model...")
ppo_model.learn(total_timesteps=20000)

#  Save Model
ppo_model.save("/content/ppo_train")
print(" PPO Model Training Completed and Saved.")



🚀 Using device: cuda
Using cuda device
🚀 Training PPO Model...
-----------------------------
| time/              |      |
|    fps             | 610  |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 1024 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 547         |
|    iterations           | 2           |
|    time_elapsed         | 3           |
|    total_timesteps      | 2048        |
| train/                  |             |
|    approx_kl            | 0.010527509 |
|    clip_fraction        | 0.0771      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | 6.29e-05    |
|    learning_rate        | 0.0003      |
|    loss                 | 2.72e+04    |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.00997    |
|    value_loss           | 5.77e+04    |
-------------

In [None]:
# here as we are getting the mean reward the constant value , we are trying to chnage the environment an then do the next steps


In [None]:
# test cases to validate our environment
#  Create environment and wrap in DummyVecEnv for compatibility
env = DummyVecEnv([lambda: ImprovedCollisionAvoidanceEnv(max_steps=100)])

#  Define device for training
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"🚀 Using device: {device}")

#  Create PPO Model
ppo_model = PPO(
    "MlpPolicy",
    env,
    verbose=1,
    learning_rate=0.0003,
    batch_size=64,
    n_steps=1024,
    n_epochs=10,
    gamma=0.99,
    gae_lambda=0.95,
    clip_range=0.2,
    device=device
)

#  Train the PPO Model
print("🚀 Training PPO Model...")
ppo_model.learn(total_timesteps=20000)

#  Save Model
ppo_model.save("/content/ppo_improved_collision_avoidance")
print(" PPO Model Training Completed and Saved.")



TypeError: ImprovedCollisionAvoidanceEnv.__init__() missing 1 required positional argument: 'df'

In [96]:
#  Load the trained PPO model and set up environment
#  Reload trained PPO model for testing
env = DummyVecEnv([lambda: ImprovedCollisionAvoidanceEnv(max_steps=100)])
ppo_model = PPO.load("/content/ppo_balanced_actions_model", env=env)

#  Reset environment
obs = env.reset()

#  Test loop
print("🚀 Starting PPO Testing...")

for step in range(20):
    action, _states = ppo_model.predict(obs, deterministic=True)

    #  DummyVecEnv returns 4 values (obs, reward, done, info)
    obs, reward, done, info = env.step(action)

    print(f"Step {step+1}: Action: {action}, Reward: {reward}, Done: {done}")

    #  If done, reset environment
    if done[0]:
        print(" Episode finished. Resetting environment.")
        obs = env.reset()

print(" PPO Model Testing Completed.")



TypeError: ImprovedCollisionAvoidanceEnv.__init__() missing 1 required positional argument: 'df'

In [95]:
# saving the best PPO Model
print("\n🚀 Retraining PPO Model with Best Hyperparameters...")

#  Re-create environment for final training
env = DummyVecEnv([lambda: CollisionAvoidanceEnv(df_cleaned, max_steps=100)])

ppo_model = PPO(
    "MlpPolicy",
    env,
    verbose=1,
    learning_rate=best_params['learning_rate'],
    batch_size=best_params['batch_size'],
    n_steps=best_params['n_steps'],
    n_epochs=10,
    gamma=best_params['gamma'],
    gae_lambda=0.95,
    clip_range=0.2,
    device=device
)

#  Train longer
ppo_model.learn(total_timesteps=20000)

#  Save
ppo_model.save("/content/collision_avoidance_ppo_tuned")
print("\n Best PPO Model Trained and Saved!")



🚀 Retraining PPO Model with Best Hyperparameters...


ValueError: could not convert string to float: 'TEST'

In [97]:
#  validating the model
mean_reward, std_reward = evaluate_policy(ppo_model, env, n_eval_episodes=10)
print(f" Final Model Evaluation - Mean Reward: {mean_reward:.2f} ± {std_reward:.2f}")



✅ Final Model Evaluation - Mean Reward: 3910.00 ± 0.00


In [98]:
# loading the tuned ppo model
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

# Load your environment (same structure used during training)
env = DummyVecEnv([make_env()])  # Assuming make_env() is already defined

# Load the optimized PPO model
ppo_model = PPO.load("/content/ppo_balanced_actions_model", env=env)

print(" Optimized PPO Model Loaded Successfully!")

ValueError: could not convert string to float: 'TEST'

In [None]:
#  exporting results and saving in csv format
import pandas as pd

results = []

obs = env.reset()

for step in range(50):
    action, _ = ppo_model.predict(obs, deterministic=True)
    obs, reward, done, info = env.step(action)

    results.append({
        "Step": step + 1,
        "Action": action[0],
        "Reward": reward[0],
        "Done": done[0]
    })

    if done[0]:
        obs = env.reset()

# Save the logs
df_results = pd.DataFrame(results)
df_results.to_csv("ppo_evaluation_results.csv", index=False)

print(" Evaluation Results Exported to ppo_evaluation_results.csv")

✅ Evaluation Results Exported to ppo_evaluation_results.csv


In [None]:
# Assuming your PPO model is already loaded
obs = env.reset()

for i in range(20):  # testing loop
    action, _ = ppo_model.predict(obs, deterministic=True)

    # Extract current state from obs[0] if using DummyVecEnv
    obs_values = obs[0]  # Single environment obs
    current_state = {
        'altitude': obs_values[0],
        'heading': obs_values[2]  # adjust according to your observation indices
    }

    # Translate to instruction
    instruction = action_to_instruction(action, current_state)

    print(f"Step {i+1}: Action: {action}, Instruction: {instruction}")

    # Step the environment
    obs, reward, done, info = env.step(action)

    if done[0]:  # If DummyVecEnv returns done as a list
        obs = env.reset()

Step 1: Action: [1], Instruction: Climb 500 ft to 861.9795837402344 ft altitude.
Step 2: Action: [1], Instruction: Climb 500 ft to 1274.7431640625 ft altitude.
Step 3: Action: [1], Instruction: Climb 500 ft to 1376.9959106445312 ft altitude.
Step 4: Action: [1], Instruction: Climb 500 ft to 709.4632873535156 ft altitude.
Step 5: Action: [1], Instruction: Climb 500 ft to 554.4168968200684 ft altitude.
Step 6: Action: [1], Instruction: Climb 500 ft to 897.8636169433594 ft altitude.
Step 7: Action: [1], Instruction: Climb 500 ft to 704.7793273925781 ft altitude.
Step 8: Action: [1], Instruction: Climb 500 ft to 885.1084899902344 ft altitude.
Step 9: Action: [1], Instruction: Climb 500 ft to 1070.0892944335938 ft altitude.
Step 10: Action: [1], Instruction: Climb 500 ft to 734.3873901367188 ft altitude.
Step 11: Action: [1], Instruction: Climb 500 ft to 838.216552734375 ft altitude.
Step 12: Action: [1], Instruction: Climb 500 ft to 787.8282775878906 ft altitude.
Step 13: Action: [1], Inst

In [None]:
#  actions to human readbale instructions
# Example function to map action to pilot-friendly instructions
def action_to_instruction(action, current_state):
    altitude = current_state.get('altitude', 10000)
    heading = current_state.get('heading', 180)  # example default heading

    if action == 0:
        return "Maintain current heading and altitude."
    elif action == 1:
        # Example climb instruction: increase by 500 ft
        new_altitude = altitude + 500
        return f"Climb 500 ft to {new_altitude} ft altitude."
    elif action == 2:
        # Example turn: adjust heading by 20 degrees right
        new_heading = (heading + 20) % 360
        return f"Turn 20 degrees to the right. New heading {new_heading} degrees."
    else:
        return "No valid action determined."

In [1]:
# -------------------- Imports --------------------
import google.generativeai as genai
import numpy as np

#  Step 1: Configure your Gemini API Key
genai.configure(api_key="AIzaSyCIssaMyDw3j57a0FQgaxUfFTaC4V-reNc")  # Replace with your actual key

#  Step 2: Initialize Gemini 1.5 Pro model (or available latest)
try:
    model = genai.GenerativeModel(model_name="gemini-1.5-pro-latest")  # Correct model identifier
except Exception as e:
    print(f" Error initializing Gemini model: {e}")

# -------------------- Function for LLM Explanation --------------------
def explain_action_with_gemini(action, observation):
    action_map = {
        0: "Hold current course",
        1: "Climb to a higher altitude",
        2: "Turn to avoid collision"
    }

    # Ensure action is an int, even if PPO returns ndarray
    action = int(action)

    # Build human-readable instruction
    instruction = f"Action suggested: {action_map.get(action, 'Unknown')}\n"

    # Convert observation to standard Python list (Gemini prefers JSON-serializable inputs)
    observation_list = observation.tolist() if isinstance(observation, np.ndarray) else observation

    # Flight state details
    state_details = (
        f"Current flight state:\n"
        f"Distance to nearby aircraft: {observation_list[0]:.2f} units\n"
        f"Altitude difference: {observation_list[1]:.2f} ft\n"
        f"Velocity difference: {observation_list[2]:.2f} knots\n"
        f"Heading difference: {observation_list[3]:.2f} degrees\n"
        f"Vertical rate difference: {observation_list[4]:.2f} ft/min\n"
        f"Time to potential collision: {observation_list[5]:.2f} seconds"
    )

    # Construct prompt
    prompt = (
        f"{instruction}\n"
        f"{state_details}\n\n"
        "Explain in simple terms to a human pilot why this action was chosen to ensure flight safety."
    )

    # Call Gemini to get the explanation
    try:
        response = model.generate_content([prompt])

        # Extract the explanation text
        explanation = response.text
        return explanation

    except Exception as e:
        print(f" Gemini API Error: {str(e)}")
        return " No explanation available."

# -------------------- Example Usage --------------------

#  Example balanced action + observation (replace with real PPO output in practice)
sample_action = 1  # Example action (Climb)
sample_observation = np.array([250.0, 500.0, -30.0, 10.0, -5.0, 60.0])  # Example observation array

#  Call the LLM explainability function
explanation = explain_action_with_gemini(sample_action, sample_observation)

#  Display explanation
print("🧠 LLM Explanation from Gemini 1.5 Pro:\n")
print(explanation)

🧠 LLM Explanation from Gemini 1.5 Pro:

There's another aircraft dangerously close, just 250 units away, and 500 feet below you.  You're also closing in on each other speed-wise (the negative velocity difference means you're catching up to them).  Climbing to a higher altitude is the quickest way to increase the separation between your aircraft and the other one, preventing a potential collision in the next minute.

