In [1]:
%pwd

'c:\\Users\\Asus\\Desktop\\Krish Naik MLOps Course\\SayGenix AI Technologies\\Predict_Flat_Rent_Dhaka\\research'

In [2]:
import os
os.chdir("../")
%pwd

'c:\\Users\\Asus\\Desktop\\Krish Naik MLOps Course\\SayGenix AI Technologies\\Predict_Flat_Rent_Dhaka'

In [3]:
# Configuration

from pathlib import Path

from src.utils.common import read_yml


class ConfigurationManager:
    def __init__(self, config_filepath):
        self.config = read_yml(config_filepath)
        
        # create artifacts root directory
        artifacts_root = self.config.get("artifacts_root", "artifacts")
        Path(artifacts_root).mkdir(parents=True, exist_ok=True)
        
    def get_data_ingestion_config(self):
        ingestion_config = self.config.get("data_ingestion")
        
        return {
            "root_dir": Path(ingestion_config.get("root_dir")), 
            "ingested_data_file": Path(ingestion_config.get("ingested_data_file"))
            }

In [None]:
# Component
from pymongo.mongo_client import MongoClient
import pandas as pd
from dotenv import load_dotenv

load_dotenv()

class DataIngestion:
    def __init__(self, data_ingestion_config):
        self.data_ingestion_config = data_ingestion_config
        
        # create ingestion artifacts root dir
        Path(self.data_ingestion_config.get("root_dir")).mkdir(parents=True, exist_ok=True)
        
    def ingest_data(self):
        mongodb_uri = os.getenv("MONGODB_URI")
        database_name = os.getenv("DATABASE")
        collection_name = os.getenv("COLLECTION")
        
        client = MongoClient(mongodb_uri)
        db = client[database_name]
        collection = db[collection_name]
        
        try:
            data= collection.find()
            df = pd.DataFrame(list(data))
            print(f"loaded data from mongodb. dataframe shape: {df.shape}")
            
            df_save_path = self.data_ingestion_config.get("ingested_data_file")
            
            # make parent directory
            Path(df_save_path).parent.mkdir(parents=True, exist_ok=True)
            
            # save dataframe
            df.to_csv(df_save_path, index=False)
            
        except Exception as e:
            raise e
        

In [None]:
# pipeline:
import sys
from src.utils.constant import CONFIG_FILE_PATH
from src.utils.exception import CustomException

try:
    config = ConfigurationManager(config_filepath = CONFIG_FILE_PATH)
    ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(ingestion_config)
    data_ingestion.ingest_data()
    
except Exception as e:
    raise CustomException(str(e), sys)

loaded data from mongodb. dataframe shape: (28787, 6)


In [6]:
from src.utils.constant import CONFIG_FILE_PATH
