### Import libraries

In [1]:
from pyspark.sql import functions as F
import sys, os
path = os.path.dirname(os.getcwd()) + '/BaseUtils/'
sys.path.append(os.path.abspath(path))

from hdfs_io import *

hdfs_obj = HDFS_IO()

In [2]:
# Instanciate FLightRadarAPI

from FlightRadar24 import FlightRadar24API
fr_api = FlightRadar24API()

### Get Airlines

In [3]:
airlines = fr_api.get_airlines()

In [4]:
print(airlines[0])

{'Name': '21 Air', 'Code': '2I', 'ICAO': 'CSB'}


In [5]:
print(dir(airlines[0]))

['__class__', '__class_getitem__', '__contains__', '__delattr__', '__delitem__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__ior__', '__iter__', '__le__', '__len__', '__lt__', '__ne__', '__new__', '__or__', '__reduce__', '__reduce_ex__', '__repr__', '__reversed__', '__ror__', '__setattr__', '__setitem__', '__sizeof__', '__str__', '__subclasshook__', 'clear', 'copy', 'fromkeys', 'get', 'items', 'keys', 'pop', 'popitem', 'setdefault', 'update', 'values']


In [6]:
airlines_tuples = [{
    'name': obj['Name'],
    'code': obj['Code'],
    'icao': obj['ICAO']
                   } for obj in airlines]

### Start Spark

In [7]:
from pyspark.sql import SparkSession
from pyspark.sql import types as T

spark = SparkSession.builder \
    .appName("LocalSpark") \
    .master("spark://spark-master:7077") \
    .config("spark.rpc.message.maxSize", "1024") \
    .getOrCreate()

print("Spark version:", spark.version)

Spark version: 3.5.0


#### Create airlines dataframe

In [8]:
# Define the schema explicitly
airlines_schema = T.StructType([
    T.StructField("name", T.StringType(), True),
    T.StructField("code", T.StringType(), True),
    T.StructField("icao", T.StringType(), True)
])

In [9]:
airlines_df = spark.createDataFrame(airlines_tuples, airlines_schema)

In [10]:
airlines_df.show()

+------------------+----+----+
|              name|code|icao|
+------------------+----+----+
|            21 Air|  2I| CSB|
|      247 Aviation|    | EMC|
|   2Excel Aviation|    | BRO|
|         4 Airways|    | DAK|
|       40-Mile Air|  Q5| MLA|
|  748 Air Services|  FE| IHO|
|             9 Air|  AQ| JYH|
|        Abakan Air|  S5| NKP|
|          ABS Jets|    | ABP|
|Abu Dhabi Aviation|    | BAR|
|           ABX Air|  GB| ABX|
|     Acass Ireland|    | SON|
|      Advanced Air|  AN| WSN|
|   Aegean Airlines|  A3| AEE|
|        Aer Lingus|  EI| EIN|
|     Aer Lingus UK|  EG| EUK|
|         AerCaribe|  JK| ACL|
|              Aero|  5E| BLK|
|              Aero|  N2| NIG|
|        Aero Asahi|    | AKF|
+------------------+----+----+
only showing top 20 rows



#### Store to HDFS

In [11]:
destination_path = hdfs_obj.base_url + hdfs_obj.user_path + 'FlightRadarApi/airlines'
print(destination_path)

hdfs://namenode:9000/user/jovyan/FlightRadarApi/airlines


In [12]:
airlines_df.count()

2202

In [13]:
airlines_df.write.parquet(destination_path, mode='overwrite')

----------------------------------------
Exception occurred during processing of request from ('127.0.0.1', 52250)
ERROR:root:Exception while sending command.
Traceback (most recent call last):
  File "/opt/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py", line 516, in send_command
    raise Py4JNetworkError("Answer from Java side is empty")
py4j.protocol.Py4JNetworkError: Answer from Java side is empty

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line 1038, in send_command
    response = connection.send_command(command)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py", line 539, in send_command
    raise Py4JNetworkError(
py4j.protocol.Py4JNetworkError: Error while sending or receiving
Traceback (most recent call last):
  File "/opt/conda/lib/python3.11/socketserver.py

Py4JError: An error occurred while calling o43.parquet

In [None]:
spark.stop()