In [1]:
import os

In [2]:
%pwd

'e:\\real\\barkalyze\\research'

In [4]:
os.chdir("../")

In [5]:
%pwd

'e:\\real\\barkalyze'

In [14]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    MONGO_URI: str
    local_data_file: Path

In [16]:
from src.bark.constants import *
from src.bark.utils.common import read_yaml, create_directories

In [9]:
import os
from pymongo import MongoClient
import gridfs

In [21]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion

        create_directories([config.root_dir])

        data_ingestion_config = DataIngestionConfig(
            root_dir=config.root_dir,
            MONGO_URI=config.MONGO_URI,
            local_data_file=config.local_data_file,
        )

        return data_ingestion_config
      


In [24]:
class DataIngestion:
  def __init__(self, config: DataIngestionConfig):
      self.config = config
 
  def download_file(self,collection_name)-> str:
      '''
      Fetch data from the uri
      '''

      try: 
        MONGO_URI = self.config.MONGO_URI
        client = MongoClient(MONGO_URI)

        db = client[collection_name]
        fs = gridfs.GridFS(db)

        # Local directory to save the retrieved dataset
        OUTPUT_DIR = self.config.local_data_file + collection_name

        # Create the root output directory if it doesn't exist
        os.makedirs(OUTPUT_DIR, exist_ok=True)

        # Iterate through all files in GridFS
        for file in fs.find():
            label = file.metadata.get("label", "unknown")
            filename = file.filename

            # Create label folder if it doesn't exist
            label_dir = os.path.join(OUTPUT_DIR, label)
            os.makedirs(label_dir, exist_ok=True)

            # Define full file path
            output_path = os.path.join(label_dir, filename)

            # Write the image to disk
            with open(output_path, 'wb') as f:
                f.write(file.read())

            print(f"[DOWNLOADED] {filename} to {label}")

      except Exception as e:
          raise e
      
  


In [25]:
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_ingestion.download_file("emotion_dataset_test")
    data_ingestion.download_file("emotion_dataset_train")
except Exception as e:
    raise e

[2025-05-13 01:00:52,659: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-05-13 01:00:52,661: INFO: common: yaml file: params.yaml loaded successfully]
[2025-05-13 01:00:52,662: INFO: common: created directory at: artifacts]
[2025-05-13 01:00:52,663: INFO: common: created directory at: artifacts/data_ingestion]
[DOWNLOADED] PrivateTest_10131363.jpg to angry
[DOWNLOADED] PrivateTest_10304478.jpg to angry
[DOWNLOADED] PrivateTest_1054527.jpg to angry
[DOWNLOADED] PrivateTest_10590091.jpg to angry
[DOWNLOADED] PrivateTest_1109992.jpg to angry
[DOWNLOADED] PrivateTest_11296953.jpg to angry
[DOWNLOADED] PrivateTest_12000629.jpg to angry
[DOWNLOADED] PrivateTest_12008383.jpg to angry
[DOWNLOADED] PrivateTest_12191716.jpg to angry
[DOWNLOADED] PrivateTest_1221822.jpg to angry
[DOWNLOADED] PrivateTest_12403575.jpg to angry
[DOWNLOADED] PrivateTest_12766285.jpg to angry
[DOWNLOADED] PrivateTest_12846357.jpg to angry
[DOWNLOADED] PrivateTest_1290484.jpg to angry
[DOWNLOADE

In [6]:
from dotenv import load_dotenv


In [5]:
import os
import gridfs
from pymongo import MongoClient
from dotenv import load_dotenv

def download_file() -> str:
    """
    Fetch data from the URI and save locally.
    """
    try:
        load_dotenv()
        MONGO_URI = os.getenv("MONGODB_URI")
        client = MongoClient(MONGO_URI)
        db = client["emotion_dataset"]
        fs = gridfs.GridFS(db)

        # Local directory to save the retrieved dataset
        OUTPUT_DIR = r'e:\real\barkalyze\temp'

        # Create the root output directory if it doesn't exist
        os.makedirs(OUTPUT_DIR, exist_ok=True)

        # Iterate through all files in GridFS
        for file in fs.find():
            label = file.metadata.get("prediction", "unknown")
            filename = file.filename

            # Ensure filename compatibility (replace colons for Windows)
            safe_filename = filename.replace(':', '-')

            # Create label folder if it doesn't exist
            label_dir = os.path.join(OUTPUT_DIR, label)
            os.makedirs(label_dir, exist_ok=True)

            # Define full file path
            output_path = os.path.join(label_dir, safe_filename)

            # Write the image to disk safely
            with open(output_path, 'wb') as f:
                f.write(file.read())

            print(f"[DOWNLOADED] {safe_filename} to {label}")

        return "Download complete!"

    except Exception as e:
        print(f"Error: {e}")
        return "Download failed!"


In [6]:
download_file()

[DOWNLOADED] angry_2025-05-24T19-28-11.706100.jpg to angry
[DOWNLOADED] angry_2025-05-24T19-28-12.280903.jpg to angry
[DOWNLOADED] angry_2025-05-24T19-28-13.591079.jpg to angry
[DOWNLOADED] angry_2025-05-24T19-28-14.076436.jpg to angry
[DOWNLOADED] angry_2025-05-24T19-28-14.537324.jpg to angry
[DOWNLOADED] angry_2025-05-24T19-28-14.770155.jpg to angry
[DOWNLOADED] angry_2025-05-24T19-28-15.117343.jpg to angry
[DOWNLOADED] angry_2025-05-24T19-28-15.555654.jpg to angry
[DOWNLOADED] angry_2025-05-24T19-28-15.732492.jpg to angry
[DOWNLOADED] angry_2025-05-24T19-28-16.302221.jpg to angry
[DOWNLOADED] angry_2025-05-24T19-28-16.608570.jpg to angry
[DOWNLOADED] angry_2025-05-24T19-28-17.317351.jpg to angry
[DOWNLOADED] angry_2025-05-24T19-28-17.559972.jpg to angry
[DOWNLOADED] angry_2025-05-24T19-28-18.118392.jpg to angry
[DOWNLOADED] angry_2025-05-24T19-28-18.621001.jpg to angry
[DOWNLOADED] angry_2025-05-24T19-28-18.814973.jpg to angry
[DOWNLOADED] happy_2025-05-24T19-28-19.315529.jpg to hap

'Download complete!'