In [1]:
import os

os.chdir("../")
%pwd

'd:\\DHIRAJ\\Data_Science\\Jupyter_Workspace\\Projects\\SpaceX-Falcon-9-first-stage-Landing-Prediction'

In [2]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    data_url: list
    data_path: Path

In [3]:
from SpaceXF9LandingPred.constants import *
from SpaceXF9LandingPred.utils.common import read_yaml, create_directories

In [4]:
class ConfigurationManager:
    def __init__(self, 
                config_filepath=CONFIG_FILE_PATH, 
                params_filepath=PARAMS_FILE_PATH):
        
        self.config_file=read_yaml(config_filepath)
        self.params_file=read_yaml(params_filepath)

        create_directories([self.config_file.artifacts_root])
    
    def get_data_ingestion_config(self) -> DataIngestionConfig:
        
        config=self.config_file.data_ingestion
        create_directories([config.root_dir])

        data_ingestion_config=DataIngestionConfig(
            root_dir=config.root_dir,
            data_url=config.data_url,
            data_path=config.data_path
        )

        return data_ingestion_config

In [5]:
import requests
import pandas as pd
import numpy as np
import datetime

from SpaceXF9LandingPred.logging import logger


class DataIngestion:
    def __init__(self, config:DataIngestionConfig):
        self.config=config

    def getBoosterVersion(self,data):
        for x in data['rocket']:
            if x:
                response = requests.get("https://api.spacexdata.com/v4/rockets/"+str(x)).json()
                self.BoosterVersion.append(response['name'])

    def getLaunchSite(self,data):
        for x in data['launchpad']:
            if x:
                response = requests.get("https://api.spacexdata.com/v4/launchpads/"+str(x)).json()
                self.Longitude.append(response['longitude'])
                self.Latitude.append(response['latitude'])
                self.LaunchSite.append(response['name'])

    def getPayloadData(self,data):
        for load in data['payloads']:
            if load:
                response = requests.get("https://api.spacexdata.com/v4/payloads/"+load).json()
                self.PayloadMass.append(response['mass_kg'])
                self.Orbit.append(response['orbit'])


    def getCoreData(self,data):
        for core in data['cores']:
                if core['core'] != None:
                    response = requests.get("https://api.spacexdata.com/v4/cores/"+core['core']).json()
                    self.Block.append(response['block'])
                    self.ReusedCount.append(response['reuse_count'])
                    self.Serial.append(response['serial'])
                else:
                    self.Block.append(None)
                    self.ReusedCount.append(None)
                    self.Serial.append(None)
                self.Outcome.append(str(core['landing_success'])+' '+str(core['landing_type']))
                self.Flights.append(core['flight'])
                self.GridFins.append(core['gridfins'])
                self.Reused.append(core['reused'])
                self.Legs.append(core['legs'])
                self.LandingPad.append(core['landpad'])
   

    def fetch_data(self):
        response=requests.get(self.config.data_url)

        data = pd.json_normalize(response.json())
        
        data = data[['rocket', 'payloads', 'launchpad', 'cores', 'flight_number', 'date_utc']]

        data = data[data['cores'].map(len)==1]
        data = data[data['payloads'].map(len)==1]

        data['cores'] = data['cores'].map(lambda x : x[0])
        data['payloads'] = data['payloads'].map(lambda x : x[0])

        data['date'] = pd.to_datetime(data['date_utc']).dt.date

        data = data[data['date'] <= datetime.date(2025, 7, 7)]

        self.BoosterVersion = []
        self.PayloadMass = []
        self.Orbit = []
        self.LaunchSite = []
        self.Outcome = []
        self.Flights = []
        self.GridFins = []
        self.Reused = []
        self.Legs = []
        self.LandingPad = []
        self.Block = []
        self.ReusedCount = []
        self.Serial = []
        self.Longitude = []
        self.Latitude = []

        self.getBoosterVersion(data)
        self.getLaunchSite(data)
        self.getPayloadData(data)
        self.getCoreData(data)

        launch_dict = {'FlightNumber': list(data['flight_number']),
        'Date': list(data['date']),
        'BoosterVersion':self.BoosterVersion,
        'PayloadMass':self.PayloadMass,
        'Orbit':self.Orbit,
        'LaunchSite':self.LaunchSite,
        'Outcome':self.Outcome,
        'Flights':self.Flights,
        'GridFins':self.GridFins,
        'Reused':self.Reused,
        'Legs':self.Legs,
        'LandingPad':self.LandingPad,
        'Block':self.Block,
        'ReusedCount':self.ReusedCount,
        'Serial':self.Serial,
        'Longitude': self.Longitude,
        'Latitude': self.Latitude}

        data_falcon= pd.DataFrame(launch_dict)

        data_falcon9 = data_falcon[data_falcon["BoosterVersion"] != "Falcon 1"]

        data_falcon9.loc[:,'FlightNumber'] = list(range(1, data_falcon9.shape[0]+1))

        logger.info(f"Total {len(data_falcon9)} records extracted from the API")

        data_falcon9.to_csv(self.config.data_path, index=False)
        logger.info("Data file has been created in artifacts")

In [6]:
try:
    config=ConfigurationManager()
    data_ingestion_config=config.get_data_ingestion_config()
    data_ingestion=DataIngestion(config=data_ingestion_config)
    data_ingestion.fetch_data()
except Exception as e:
    raise e

[2025-08-11 20:01:52,750: INFO: common: yaml file : config\config.yaml loaded successfully]
[2025-08-11 20:01:52,756: INFO: common: yaml file : params.yaml loaded successfully]
[2025-08-11 20:01:52,758: INFO: common: created directory at : artifacts]
[2025-08-11 20:01:52,760: INFO: common: created directory at : artifacts/data_ingestion]


KeyboardInterrupt: 