In [14]:
import sys
from typing import Tuple
import numpy as np
from pandas import DataFrame
from src.forest.constant import *
from src.forest.exception import ForestException
from src.forest.logger import logging
from src.forest.utils.main_utils import MainUtils
from src.forest.entity.config_entity import DatabaseConfig
from src.forest.configuration.mongo_operations import MongoDBOperation

In [15]:
class DataIngestion:
    def __init__(self):
        self.utils = MainUtils()

        self.mongo_op = MongoDBOperation()

        self.mongo_config = DatabaseConfig()

    def get_data_from_mongodb(self) -> DataFrame:
        """
        Method Name :   split_data_as_train_test
        Description :   This method splits the dataframe into train set and test set based on split ratio 
        
        Output      :   Folder is created in s3 bucket
        On Failure  :   Write an exception log and then raise an exception
        
        Version     :   1.2
        Revisions   :   moved setup to cloud
        """
        logging.info("Entered get_data_from_mongodb method of Data_Ingestion class")

        try:
            logging.info("Getting the dataframe from mongodb")

            df = self.mongo_op.get_collection_as_dataframe(
                self.mongo_config.DATABASE_NAME, self.mongo_config.COLLECTION_NAME
            )

            df = df.replace("na", np.nan)

            logging.info("Got the dataframe from mongodb")

            logging.info(
                "Exited the get_data_from_mongodb method of Data_Ingestion class"
            )

            return df

        except Exception as e:
            raise ForestException(e, sys) from e

    def initiate_data_ingestion(self) -> Tuple[DataFrame, DataFrame]:
        """
        Method Name :   initiate_data_ingestion
        Description :   This method initiates the data ingestion components of training pipeline 
        
        Output      :   train set and test set are returned as the artifacts of data ingestion components
        On Failure  :   Write an exception log and then raise an exception
        
        Version     :   1.2
        Revisions   :   moved setup to cloud
        """
        logging.info("Entered initiate_data_ingestion method of Data_Ingestion class")

        try:
            df = self.get_data_from_mongodb()
            print(df)
        except Exception as e:
            raise ForestException(e, sys) from e
    

In [16]:
data_ingestion = DataIngestion()
data_ingestion.initiate_data_ingestion()

KeyboardInterrupt: 