## Import neccessary packages

In [2]:
from pyspark.sql import SparkSession
import requests
import pandas as pd

### Load data from API

In [3]:
def load_data(base_url):
    # Робимо запит в API на отримання даних
    response = requests.get(base_url)

    # Розпаковуємо данні як словник з json-формату
    launches_data = response.json()

    # Створюємо словник з потрібними даними
    launches = [{
        "flight_number": int(launch["flight_number"]),
        "name": launch["name"],
        "date_utc": pd.to_datetime(launch["date_utc"]),
        "details": launch["details"],
        "rocket": launch["rocket"],
        "success": launch["success"],
        "with_crew": True if launch["crew"] else False
    } for launch in launches_data]

    # Створюємо DataFrame
    df_launches = pd.DataFrame(launches)
    return df_launches

### Initialization of Spark and create spark.DataFrame

In [4]:
spark = SparkSession.builder \
                    .appName("SpaceX Data Processing Lab") \
                    .getOrCreate()

In [5]:
# Константи
BASE_URL = "https://api.spacexdata.com/v5/launches"

# Виклик функції отримання даних з API як датафрейм
df_launches = load_data(BASE_URL)

# Створення spark.DataFrame'у
launches_df = spark.createDataFrame(df_launches)

# Вивід даних
launches_df.show()
print(launches_df.dtypes)

+-------------+--------------------+-------------------+--------------------+--------------------+-------+---------+
|flight_number|                name|           date_utc|             details|              rocket|success|with_crew|
+-------------+--------------------+-------------------+--------------------+--------------------+-------+---------+
|            1|           FalconSat|2006-03-24 22:30:00|Engine failure at...|5e9d0d95eda69955f...|  false|    false|
|            2|             DemoSat|2007-03-21 01:10:00|Successful first ...|5e9d0d95eda69955f...|  false|    false|
|            3|         Trailblazer|2008-08-03 03:34:00|Residual stage 1 ...|5e9d0d95eda69955f...|  false|    false|
|            4|              RatSat|2008-09-28 23:15:00|Ratsat was carrie...|5e9d0d95eda69955f...|   true|    false|
|            5|            RazakSat|2009-07-13 03:35:00|                NULL|5e9d0d95eda69955f...|   true|    false|
|            6|Falcon 9 Test Flight|2010-06-04 18:45:00|        

### Filter launches with crews

In [6]:
launches_with_crew_df = launches_df.filter(launches_df.with_crew)
launches_with_crew_df.show()

+-------------+--------------------+-------------------+--------------------+--------------------+-------+---------+
|flight_number|                name|           date_utc|             details|              rocket|success|with_crew|
+-------------+--------------------+-------------------+--------------------+--------------------+-------+---------+
|           94|CCtCap Demo Missi...|2020-05-30 19:22:00|SpaceX will launc...|5e9d0d95eda69973a...|   true|     true|
|          107|              Crew-1|2020-11-16 00:27:00|SpaceX will launc...|5e9d0d95eda69973a...|   true|     true|
|          123|              Crew-2|2021-04-23 09:49:00|SpaceX launches t...|5e9d0d95eda69973a...|   true|     true|
|          135|        Inspiration4|2021-09-16 00:02:00|Inspiration4 is t...|5e9d0d95eda69973a...|   true|     true|
|          136|              Crew-3|2021-11-11 02:03:00|SpaceX will launc...|5e9d0d95eda69973a...|   true|     true|
|          156|                Ax-1|2022-04-08 15:17:00|Axiom Mi

### Calculate success rate

In [7]:
success_rate = (launches_with_crew_df.filter(launches_with_crew_df.success).count() / launches_with_crew_df.count()) * 100
print(f"Rate of successful launches with crews -> {success_rate}%")

Rate of successful launches with crews -> 100.0%


In [8]:
spark.stop()