In [None]:
from minio import Minio
from time import sleep
from requests import get
import json
from io import BytesIO
from typing import Self
import logging

logging.basicConfig(filename='NeoPipeline.log', level=logging.INFO)
logger = logging.getLogger(__name__)

In [None]:
# Connect to Minio blob storage
def create_minio_client(endpoint='localhost:9000', 
                        access_key='minioadmin', 
                        secret_key='minioadmin', 
                        secure=False):

    try: 
        # Initialize client
        client = Minio(endpoint=endpoint, 
                       access_key=access_key, 
                       secret_key=secret_key, 
                       secure=secure)

        # verify connection
        client.list_buckets()
        logger.info(f"Successfully connected to Minio at {endpoint}")
        connection_status = True

        return client, connection_status

    except Exception as e:
        logger.error(f"{e}")
        connection_status = False
        return client, connection_status

    
minio_client, cur_connection_status = create_minio_client()

In [None]:
# Set request parameters for NASA API request
api_key_param = 'Sfn0wfG6FG6E3D5Hu8MrxSja38yMXftWqboKv6ZH'
api_uri_param = 'https://api.nasa.gov/neo/rest/v1/feed?'
start_date_param = '2025-05-02'
end_date_param = '2025-05-09'
bucket_name_param = 'neo'
mode = 'silver'

In [None]:
class NeoApiClient:
    def __init__(self, 
                 api_key,
                 api_uri,
                 start_date, 
                 end_date, 
                 storage, 
                 connection_status,
                 bucket_name,
                 mode):
        
        self.key = api_key
        self.api_uri = api_uri
        self.start_date = start_date
        self.end_date = end_date
        self.storage = storage
        self.connection_status = connection_status
        self.bucket_name = bucket_name
        self.mode = mode
        self.data = None

        
    def extract(self) -> Self:
        
        full_uri = f'{self.api_uri}start_date={self.start_date}&end_date={self.end_date}&api_key={self.key}'
       
        match self.mode:
            
            case 'bronze':
                HttpResponse = get(full_uri)

                # Convert JSON to bytes
                json_bytes = json.dumps(HttpResponse.json()).encode('utf-8')

                # Create a BytesIO object
                self.data = BytesIO(json_bytes)

                return self
            
            case 'silver':
              
                obj_name = f'bronze/{self.bucket_name}-{self.start_date}_{self.end_date}.json'
                
                if self.connection_status:

                    try:
                        blob = self.storage.get_object(self.bucket_name, obj_name)
                        logger.info(f"{obj_name} retrieved from '{self.bucket_name}' bucket")
                        data: str = blob.data.decode('utf-8')
                        self.data = json.loads(data)

                    finally:
                        blob.close()
                        blob.release_conn()
                    

                    return self
                else:
                    logger.error("Connection Error: No files extracted")
                    return self
                
            case 'gold':
              
                obj_name = f'silver/{self.bucket_name}-{self.start_date}_{self.end_date}.json'
                
                if self.connection_status:

                    try:
                        blob = self.storage.get_object(self.bucket_name, obj_name)
                        logger.info(f"{obj_name} retrieved from '{self.bucket_name}' bucket")
                        data: str = blob.data.decode('utf-8')
                        self.data = json.loads(data)

                    finally:
                        blob.close()
                        blob.release_conn()
                    

                    return self
                else:
                    logger.error("Connection Error: No files extracted")
                    return self

                
    def transform(self) -> Self:
        match self.mode:
            case 'bronze':
                logger.info("No transformation implemented")
            
            case 'silver':
                logger.info("No transformation implemented")
            
            case 'gold':
                logger.info("Final data transformation complete")
                
                        
    def load(self) -> None:
        match self.mode:
            case 'bronze':
                obj_name = f'{self.mode}/{self.bucket_name}-{self.start_date}_{self.end_date}.json'

                if self.connection_status:
                    self.storage.put_object(
                        self.bucket_name, 
                        obj_name, 
                        self.data,
                        length=len(self.data.getvalue()),
                        content_type='application/json'
                    )

                    logger.info(
                        "JSON file successfully uploaded as object",
                        obj_name, "to bucket", bucket,
                    )
                else:
                    logger.error("Connection Error: No files loaded")
            
            case 'silver':
                obj_name = f'{self.mode}/{self.bucket_name}-{self.start_date}_{self.end_date}.parquet'
                logger.info(
                    "Parquet file successfully uploaded, with Iceberg table format as object",
                    obj_name, ", to bucket", bucket,
                )

            case 'gold':
                obj_name = f'{self.mode}/{self.bucket_name}-{self.start_date}_{self.end_date}.parquet'
                logger.info(
                    "Parquet file successfully uploaded, with Iceberg table format as object",
                    obj_name, ", to bucket", bucket,
                )
                


In [None]:

neo_client = NeoApiClient(api_key=api_key_param,
                          api_uri=api_uri_param,
                          start_date=start_date_param, 
                          end_date=end_date_param, 
                          storage=minio_client, 
                          connection_status=cur_connection_status, 
                          bucket_name=bucket_name_param, 
                          mode=mode)

# Fetch data source file and upload the JSON file to minio bucket
neo_client.extract().transform().load()
