### Import libraries

In [1]:
from pyspark.sql import functions as F
import sys, os
path = os.path.dirname(os.getcwd()) + '/BaseUtils/'
sys.path.append(os.path.abspath(path))

from hdfs_io import *

hdfs_obj = HDFS_IO()

In [2]:
# Instanciate FLightRadarAPI

from FlightRadar24 import FlightRadar24API
fr_api = FlightRadar24API()

### Get Zones

In [3]:
zones = fr_api.get_zones()

In [4]:
print(type(zones))
print(zones)

<class 'dict'>
{'europe': {'tl_y': 72.57, 'tl_x': -16.96, 'br_y': 33.57, 'br_x': 53.05, 'subzones': {'poland': {'tl_y': 56.86, 'tl_x': 11.06, 'br_y': 48.22, 'br_x': 28.26}, 'germany': {'tl_y': 57.92, 'tl_x': 1.81, 'br_y': 45.81, 'br_x': 16.83}, 'uk': {'tl_y': 62.61, 'tl_x': -13.07, 'br_y': 49.71, 'br_x': 3.46, 'subzones': {'london': {'tl_y': 53.06, 'tl_x': -2.87, 'br_y': 50.07, 'br_x': 3.26}, 'ireland': {'tl_y': 56.22, 'tl_x': -11.71, 'br_y': 50.91, 'br_x': -4.4}}}, 'spain': {'tl_y': 44.36, 'tl_x': -11.06, 'br_y': 35.76, 'br_x': 4.04}, 'france': {'tl_y': 51.07, 'tl_x': -5.18, 'br_y': 42.17, 'br_x': 8.9}, 'ceur': {'tl_y': 51.39, 'tl_x': 11.25, 'br_y': 39.72, 'br_x': 32.55}, 'scandinavia': {'tl_y': 72.12, 'tl_x': -0.73, 'br_y': 53.82, 'br_x': 40.67}, 'italy': {'tl_y': 47.67, 'tl_x': 5.26, 'br_y': 36.27, 'br_x': 20.64}}}, 'northamerica': {'tl_y': 75, 'tl_x': -180, 'br_y': 3, 'br_x': -52, 'subzones': {'na_n': {'tl_y': 72.82, 'tl_x': -177.97, 'br_y': 41.92, 'br_x': -52.48}, 'na_c': {'tl_y':

In [5]:
zones_tuples = [{'name': zone} for zone in zones]
print(zones_tuples)

[{'name': 'europe'}, {'name': 'northamerica'}, {'name': 'southamerica'}, {'name': 'oceania'}, {'name': 'asia'}, {'name': 'africa'}, {'name': 'atlantic'}, {'name': 'maldives'}, {'name': 'northatlantic'}]


### Start Spark

In [6]:
from pyspark.sql import SparkSession
from pyspark.sql import types as T

spark = SparkSession.builder \
    .appName("LocalSpark") \
    .master("spark://spark-master:7077") \
    .config("spark.rpc.message.maxSize", "1024") \
    .getOrCreate()

print("Spark version:", spark.version)

Spark version: 3.5.0


#### Create zones dataframe

In [7]:
# Define the schema explicitly
zones_schema = T.StructType([
    T.StructField("name", T.StringType(), True)
])

In [8]:
zones_df = spark.createDataFrame(zones_tuples, zones_schema)

#### Store to HDFS

In [9]:
destination_path = hdfs_obj.base_url + hdfs_obj.user_path + 'FlightRadarApi/zones'
# destination_path = hdfs_obj.base_url + 'FlightRadarApi/zones'
print(destination_path)

hdfs://namenode:9000/user/jovyan/FlightRadarApi/zones


In [10]:
zones_df.count()

9

In [11]:
zones_df.write.parquet(destination_path, mode='overwrite')

In [12]:
spark.stop()