In [1]:
import os
os.chdir("../")
!pwd

/home/aditya/network_security


In [33]:
from pydantic import BaseModel
from pathlib import Path

class DataIngestionConfig(BaseModel):
    ## config
    ingestion_dir: Path
    collection_name: str
    database_name: str
    file_name: str
    

In [34]:
from src.NetworkSecurity.constants import *
from src.NetworkSecurity.utils.common import read_yaml,create_directories

## reads from config/config.yaml
class ConfigurationManager:
    def __init__(self,
                 config_filepath = CONFIG_FILE_PATH,
                 params_filepath = PARAMS_FILE_PATH,
                 schema_filepath = SCHEMA_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])
    
    def get_data_ingestion_config(self)->DataIngestionConfig:

        config = self.config.data_ingestion

        # create artifacts/data_ingestion
        create_directories([config.ingestion_dir])

        ##return data_ingestion_config object which is validated
        data_ingestion_config = DataIngestionConfig(

            ingestion_dir = config.ingestion_dir,
            collection_name = config.collection_name,
            database_name = config.database_name,
            file_name = config.file_name
        )

        return data_ingestion_config

In [35]:
cm = ConfigurationManager()
data_ingestion_config = cm.get_data_ingestion_config()

[2025-03-30 20:55:11,903: INFO: common : Yaml File: config/config.yaml loaded successfully]
[2025-03-30 20:55:11,904: INFO: common : Yaml File: params.yaml loaded successfully]
[2025-03-30 20:55:11,905: INFO: common : Yaml File: schema.yaml loaded successfully]
[2025-03-30 20:55:11,905: INFO: common : created directory at: artifacts]
[2025-03-30 20:55:11,905: INFO: common : created directory at: artifacts/data_ingestion]


In [None]:
import os
import pandas as pd
from pymongo import MongoClient
import certifi
from dotenv import load_dotenv
from src.NetworkSecurity.logging.logger import logger
from src.NetworkSecurity.exception.exception import NetworkSecurityException
import sys

class DataIngestion:
    ## gets config from ConfigManager
    def __init__(self, config):
        self.config = config

    ## extracts data from MongoDB and saves it as CSV
    def download_file(self):
        try:
            load_dotenv()
            MONGO_DB_URL = os.getenv("MONGO_DB_URL")

            # Connect to MongoDB Atlas
            client = MongoClient(MONGO_DB_URL, tlsCAFile=certifi.where())

            # Choose Database & Collection
            db = client[self.config.database_name]
            collection = db[self.config.collection_name]

            logger.info(f"Fetching data from MongoDB collection: {self.config.collection_name}")

            # Retrieve data from MongoDB
            data = list(collection.find({}, {"_id": 0}))  # Exclude the MongoDB `_id` field

            if not data:
                raise ValueError("No data found in the collection!")

            # Convert to Pandas DataFrame
            df = pd.DataFrame(data)

            # Save as CSV
            os.makedirs(self.config.ingestion_dir, exist_ok=True)
            csv_path = os.path.join(self.config.ingestion_dir, self.config.file_name)
            df.to_csv(csv_path, index=False)

            logger.info(f"Data successfully downloaded and saved to {csv_path}")

            return csv_path  # Return path for further processing

        except Exception as e:
            logger.error(f"Error during data download: {e}")
            NetworkSecurityException(e,sys)



In [37]:
di = DataIngestion(data_ingestion_config)
di.download_file()

[2025-03-30 20:55:12,948: INFO: 1474577949 : Fetching data from MongoDB collection: NetworkData]


[2025-03-30 20:55:15,582: INFO: 1474577949 : ✅ Data successfully downloaded and saved to artifacts/data_ingestion/datafromDB.csv]


'artifacts/data_ingestion/datafromDB.csv'