# Imports

Penser a copier le .env dans `jupyter:/home/jovyan/.env`

Formalisme du fichier .env

OPENSKY_USERNAME=<mon_user>

OPENSKY_PASSWORD=<mon_pwd>

In [None]:
!pip install python-dotenv

In [None]:
import requests
from kafka import KafkaProducer
import json
from datetime import datetime, timedelta
from dotenv import load_dotenv
import os
import time

In [None]:
# Charger les variables d'environnement depuis le fichier .env copié dans le conteneur
load_dotenv('/home/jovyan/.env')

# Configuration de l'API OpenSky
OPENSKY_ALL_FLIGHTS_URL = "https://opensky-network.org/api/flights/all?begin={}&end={}"
OPENSKY_ALL_STATES_URL = "https://opensky-network.org/api/states/all"
OPENSKY_ARRIVAL_URL = "https://opensky-network.org/api/flights/arrival?airport={}&begin={}&end={}"
USERNAME = os.environ.get('OPENSKY_USERNAME')
PASSWORD = os.environ.get('OPENSKY_PASSWORD')

# Fonction pour envoyer les données OpenSky à Kafka

In [None]:
# Kafka configuration
kafka_config = {
    'bootstrap_servers': 'kafka1:9092',  # Update with your Kafka broker
}

# Initialize Kafka Producer
producer = KafkaProducer(
    bootstrap_servers=kafka_config['bootstrap_servers'],
    value_serializer=lambda v: json.dumps(v).encode('utf-8')
)


In [None]:

def send_opensky_to_kafka(topic, url, username, password):
    # Fetch data from OpenSky
    response = requests.get(url, auth=(username, password))

    if response.status_code == 200:
        data = response.json()
        if isinstance(data, list) and len(data) > 0 and isinstance(data[0], dict):
            flights = [item for item in data if 'icao24' in item]
        else:
            flights = []


        # Publish each state to Kafka
        for flight in flights:
            producer.send(topic, value=flight)
            #print(f"Sent: {flight}")

        # Ensure all messages are sent
        producer.flush()
        print(f"Sent {len(flights)} records.")
    else:
        print(f"Failed to fetch data: {response.status_code}")

In [None]:
flights = {
    "icao24": "icao24",
    "firstSeen": "firstSeen",
    "estDepartureAirport": "estDepartureAirport",
    "lastSeen": "lastSeen",
    "estArrivalAirport": "estArrivalAirport",
    "estDepartureAirportHorizDistance": "estDepartureAirportHorizDistance",
    "estDepartureAirportVertDistance": "estDepartureAirportVertDistance",
    "estArrivalAirportHorizDistance": "estArrivalAirportHorizDistance",
    "estArrivalAirportVertDistance": "estArrivalAirportVertDistance",
    "departureAirportCandidatesCount": "departureAirportCandidatesCount",
    "arrivalAirportCandidatesCount": "arrivalAirportCandidatesCount",
}

states = {
    "icao24": "icao24",
    "callsign": "callsign",
    "origin_country": "origin_country",
    "time_position": "time_position",
    "last_contact": "last_contact",
    "longitude": "longitude",
    "latitude": "latitude",
    "baro_altitude": "baro_altitude",
    "on_ground": "on_ground",
    "velocity": "velocity",
    "true_track": "true_track",
    "vertical_rate": "vertical_rate",
    "sensors": "sensors",
    "geo_altitude": "geo_altitude",
    "squawk": "squawk",
    "spi": "spi",
    "position_source": "position_source",
    "category": "category"
}

# Durée de récupération (minutes)
step = 60

# date initiale
initial_date_str = "2025-01-25 16:00:00"

date_format = "%Y-%m-%d %H:%M:%S"
initial_date = datetime.strptime(initial_date_str, date_format)
end_date = initial_date + timedelta(minutes=step)


start = int(initial_date.timestamp())
end = int(end_date.timestamp())




In [None]:
# Send OpenSky data to Kafka

for i in range(10):
    print(f"{start}-{end}")
    send_opensky_to_kafka("opensky-flights", OPENSKY_ALL_FLIGHTS_URL.format(start, end), USERNAME, PASSWORD)
    time.sleep(5)
    initial_date = end_date
    end_date = initial_date + timedelta(minutes=step)
    start = int(initial_date.timestamp())
    end = int(end_date.timestamp())

In [None]:
# Send arrival to Kafka

# Airport code
airports = ["EDDF", "EDDM", "EDDB", "LOWS", "LFPG", "EHAM", "LEMD", "LIRF", "LEBL"]

today = datetime.now()
initial_date = today - timedelta(days=7)
start = int(initial_date.timestamp())
end = int(today.timestamp())

for airport in airports:
    print(OPENSKY_ARRIVAL_URL.format(airport, start, end))
    send_opensky_to_kafka("opensky-arrivals", OPENSKY_ARRIVAL_URL.format(airport, start, end), USERNAME, PASSWORD)
    time.sleep(5)
    

## Envoyer les vols filtrés sur kafka

In [None]:
TOPIC_FILTERED_FLIGHTS = "opensky_filtered_flights"

current_datetime = datetime.timestamp(datetime.now())

def send_filtered_flights_data(url, username, password):
    """Fetch flights for a specific timestamp and send only filtered flights to Kafka.
       Uses the flight's callsign as the unique key to allow updating flight data on duplicates."""
    global current_datetime  # Keep track of the simulated time
    params = {"time": current_datetime}  # Request data for this timestamp

    # Include authentication in the GET request
    response = requests.get(url, auth=(username, password), params=params)

    if response.status_code == 200:
        data = response.json()
        states = data.get("states", [])

        for state in states:
            # Basic check: ensure the state exists and has all required fields
            if not state or len(state) < 17:
                continue

            # Extract and validate required fields
            icao24 = state[0]
            callsign = state[1].strip() if state[1] and isinstance(state[1], str) else None
            origin_country = state[2]
            time_position = state[3]
            altitude = state[7]

            # Skip the record if any of the essential fields are missing or invalid
            if not icao24 or not callsign or not origin_country or time_position is None or altitude is None:
                continue

            # Construct the message dictionary
            message = {
                "icao24": icao24,
                "callsign": callsign,
                "origin_country": origin_country,
                "time_position": time_position,
                "last_contact": state[4],
                "longitude": state[5],
                "latitude": state[6],
                "altitude": altitude,
                "on_ground": state[8],
                "velocity": state[9],
                "heading": state[10],
                "vertical_rate": state[11],
                "sensors": state[12],
                "geo_altitude": state[13],
                "squawk": state[14],
                "spi": state[15],
                "position_source": state[16]
            }

            # Use callsign as the key (ensure it's encoded as bytes)
            producer.send(TOPIC_FILTERED_FLIGHTS, key=callsign.encode('utf-8'), value=message)
            print(f"Sent to FILTERED_FLIGHTS: {message}")

        producer.flush()
        print(f"Sent {len(states)} records.")
    else:
        print(f"Failed to fetch data: {response.status_code}")

## Thread qui envoi les donnés à kafka

In [None]:
import threading
import time

# Create a stop flag
stop_event = threading.Event()

def run_kafka_producer():
    """ Continuously send filtered flight data to Kafka every 30 seconds. """
    while not stop_event.is_set():
        send_filtered_flights_data(OPENSKY_ALL_STATES_URL, USERNAME, PASSWORD)
        time.sleep(5)

# Run the producer in a separate thread
producer_thread = threading.Thread(target=run_kafka_producer, daemon=True)
producer_thread.start()

print("Kafka producer is running in the background! Use `stop_kafka_producer()` to stop it.")

## Stop kafka thread

In [None]:
def stop_kafka_producer():
    """ Stops the Kafka producer thread gracefully. """
    print("Stopping Kafka producer...")
    stop_event.set()  # Set the stop flag
    producer_thread.join()  # Wait for the thread to finish
    print("Kafka producer stopped.")

# Call this function when you want to stop the producer
stop_kafka_producer()