In [1]:
%pwd

'c:\\Users\\Raghavan\\Documents\\CropsAndWeedsSegmentation\\research'

In [2]:
import os
os.chdir('../')

In [10]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class DataIngestionConfig:
    root_dir: Path

    train_img_dir: Path
    train_mask_dir: Path
    
    test_img_dir: Path
    test_mask_dir: Path
    
    val_img_dir: Path
    val_mask_dir: Path
    
    others_img_dir: Path
    
    mongo_uri: str
    database_name: str
    collection_name: str



In [14]:
from src.cropsAndWeedsSegmentation.constants import *
from src.cropsAndWeedsSegmentation.utils.common import read_yaml,create_directories
class ConfigurationManager:
    def __init__(self,config_filepath=CONFIG_FILE_PATH,params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_ingestion_config(self,mongo_uri:str)->DataIngestionConfig:
        config = self.config.data_ingestion
        create_directories([config.root_dir,config.train_img_dir,config.train_mask_dir,config.test_img_dir,config.test_mask_dir,config.val_img_dir,config.val_mask_dir,config.others_img_dir])  

        data_ingestion_config = DataIngestionConfig(
            root_dir=config.root_dir,
            
            train_img_dir=config.train_img_dir,
            train_mask_dir= config.train_mask_dir,

            test_img_dir=config.test_img_dir,
            test_mask_dir= config.test_mask_dir,

            val_img_dir=config.val_img_dir,
            val_mask_dir= config.val_mask_dir,

            others_img_dir=config.others_img_dir,

            mongo_uri=mongo_uri,
            database_name=config.database_name,
            collection_name=config.collection_name
        )      
        return data_ingestion_config
    



In [17]:
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
import certifi
from src.cropsAndWeedsSegmentation.logging.logger import logger
from src.cropsAndWeedsSegmentation.exception.exception import SegmentationException
import sys
from PIL import Image
import io
from box import ConfigBox
from typing import Tuple
import pymongo

## component
class DataIngestion:
    def __init__(self,config:DataIngestionConfig):
        self.config = config

    def get_image_docs_from_mongo_db(self)->Tuple[pymongo.cursor.Cursor, pymongo.cursor.Cursor, pymongo.cursor.Cursor, pymongo.cursor.Cursor]:
        '''
        '''
        client = MongoClient(self.config.mongo_uri, tlsCAFile= certifi.where(),server_api = ServerApi('1'))
        db = client[self.config.database_name]
        collection = db[self.config.collection_name]

        train_img_docs = collection.find({'category':'train'})
        val_img_docs = collection.find({'category':'val'})
        test_img_docs = collection.find({'category':'test'})
        others_img_docs = collection.find({'category':'others'})
        
        logger.info('All the required documents retrieved successfully!!')
        return train_img_docs,val_img_docs,test_img_docs,others_img_docs


    def store_img_docs_locally(self,docs:ConfigBox,split:str)->None:
        '''
        '''
        for doc in docs:

            img = Image.open(io.BytesIO(doc["image"]))
            img_name = f'{doc["filename"]}.jpg'
            img_filepath = os.path.join(self.config.root_dir,doc["category"],'img',img_name)
            img.save(img_filepath)

            if doc["mask"]!=None:    
                mask = Image.open(io.BytesIO(doc["mask"]))
                mask_name = f'{doc["filename"]}.png'
                mask_filepath = os.path.join(self.config.root_dir,doc["category"],'mask',mask_name)
                mask.save(mask_filepath)
            
        logger.info(f'All the images and respective masks of {split} set were loaded to {self.config.root_dir} successfully')


## root dir --> root_dir/category/

In [18]:
##pipeline
import os
from dotenv import load_dotenv
load_dotenv()
mongo_uri = os.getenv('MONGO_URL')
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config(mongo_uri=mongo_uri)
    
    data_ingestion = DataIngestion(data_ingestion_config)
    
    train_img_docs,val_img_docs,test_img_docs,others_img_docs = data_ingestion.get_image_docs_from_mongo_db()
    ## storing train docs
    data_ingestion.store_img_docs_locally(train_img_docs,'train')
    ## storing train docs
    data_ingestion.store_img_docs_locally(val_img_docs,'val')
    ## storing train docs
    data_ingestion.store_img_docs_locally(test_img_docs,'test')
    ## storing train docs
    data_ingestion.store_img_docs_locally(others_img_docs,'others')

except Exception as e:
    logger.error(f'Error occured : {e}')
    raise SegmentationException(e,sys)


[2025-03-09 20:59:58,170: INFO: common: Yaml file: config\config.yaml loaded successfully]
[2025-03-09 20:59:58,173: INFO: common: Yaml file: params.yaml loaded successfully]
[2025-03-09 20:59:58,176: INFO: common: Created directory at: artifacts]
[2025-03-09 20:59:58,177: INFO: common: Created directory at: artifacts/data_ingestion]
[2025-03-09 20:59:58,180: INFO: common: Created directory at: artifacts/data_ingestion/train/img]
[2025-03-09 20:59:58,182: INFO: common: Created directory at: artifacts/data_ingestion/train/mask]
[2025-03-09 20:59:58,186: INFO: common: Created directory at: artifacts/data_ingestion/test/img]
[2025-03-09 20:59:58,188: INFO: common: Created directory at: artifacts/data_ingestion/test/mask]
[2025-03-09 20:59:58,192: INFO: common: Created directory at: artifacts/data_ingestion/val/img]
[2025-03-09 20:59:58,194: INFO: common: Created directory at: artifacts/data_ingestion/val/mask]
[2025-03-09 20:59:58,197: INFO: common: Created directory at: artifacts/data_in