In [340]:
import os
os.chdir("C:/projects/Summarizer")  # switch into your project folder
print("Now CWD:", os.getcwd())
print("Config exists:", os.path.exists("config/config.yaml"))


Now CWD: C:\projects\Summarizer
Config exists: True


In [341]:
import os
os.chdir("C:/projects/Summarizer")  # <-- change to your project root
print("Now CWD:", os.getcwd())


Now CWD: C:\projects\Summarizer


In [342]:

import os
%pwd

'C:\\projects\\Summarizer'

In [343]:
os.chdir("../")
%pwd

'C:\\projects'

In [344]:
import importlib
import src.summarizer.constants as constants

# Reload the constants module to pick up changes
importlib.reload(constants)

print("CONFIG_FILE_PATH:", constants.CONFIG_FILE_PATH)
print("CONFIG exists:", constants.CONFIG_FILE_PATH.exists())
print("PARAMS_FILE_PATH:", constants.PARAMS_FILE_PATH)
print("PARAMS exists:", constants.PARAMS_FILE_PATH.exists())



PROJECT_ROOT: C:\projects\Summarizer
CONFIG_FILE_PATH: C:\projects\Summarizer\config\config.yaml
CONFIG exists: True
PARAMS_FILE_PATH: C:\projects\Summarizer\params.yaml
PARAMS exists: True
CONFIG_FILE_PATH: C:\projects\Summarizer\config\config.yaml
CONFIG exists: True
PARAMS_FILE_PATH: C:\projects\Summarizer\params.yaml
PARAMS exists: True


In [345]:
# Cell 1
from dataclasses import dataclass
from pathlib import Path

@dataclass
class DataIngestionConfig:
    root_dir: Path
    source_URL: str
    local_data_file: Path
    unzip_dir: Path

    
    
    

In [346]:
# Cell 2
from pathlib import Path

# Explicitly point to your project folder
PROJECT_ROOT = Path("C:/projects/Summarizer")  # <-- adjust to your actual path

CONFIG_FILE_PATH = PROJECT_ROOT / "config" / "config.yaml"
PARAMS_FILE_PATH = PROJECT_ROOT / "params.yaml"

print("CONFIG exists:", CONFIG_FILE_PATH.exists())  # should be True
print("PARAMS exists:", PARAMS_FILE_PATH.exists())  # should be True


CONFIG exists: True
PARAMS exists: True


In [347]:

from src.summarizer.utils.common import read_yaml, create_directories
from src.summarizer.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH

from pathlib import Path


    # go up one level to Summarizer





In [348]:
import os
import urllib.request as request
import zipfile
from src.summarizer.logging import logger 


In [349]:
# Cell 4
# Cell 4


class ConfigurationManager:
    def __init__(self, config_path=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH):
        # Read YAML files
        self.config = read_yaml(config_path)
        self.params = read_yaml(params_filepath)

        # Convert artifacts_root to absolute Path
        self.artifacts_root = PROJECT_ROOT / self.config['artifacts_root']
        create_directories([self.artifacts_root])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config['data_ingestion']
        
        # Absolute paths
        root_dir = self.artifacts_root / Path(config['root_dir']).name
        local_data_file = self.artifacts_root / Path(config['local_data_file']).name
        unzip_dir = self.artifacts_root / Path(config['unzip_dir']).name

        create_directories([root_dir, unzip_dir])

        return DataIngestionConfig(
            root_dir=root_dir,
            source_URL=str(config['source_URL']),
            local_data_file=local_data_file,
            unzip_dir=unzip_dir
        )


In [350]:
# Cell 6
# Cell 5
import os
import urllib.request as request
import zipfile
from src.summarizer.logging import logger

class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config
    
    def download_file(self):
        if not self.config.local_data_file.exists():
            filename, header = request.urlretrieve(
                url=self.config.source_URL,
                filename=self.config.local_data_file
            )
            logger.info(f"File downloaded to: {self.config.local_data_file}")
        else:
            logger.info(f"File already exists at: {self.config.local_data_file}")
    
    def extract_zip_file(self):
        os.makedirs(self.config.unzip_dir, exist_ok=True)
        with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
            zip_ref.extractall(self.config.unzip_dir)
        logger.info(f"Extracted zip file to: {self.config.unzip_dir}")


In [351]:
from src.summarizer.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH

print("PROJECT_ROOT:", PROJECT_ROOT)
print("CONFIG_FILE_PATH:", CONFIG_FILE_PATH)
print("CONFIG exists:", CONFIG_FILE_PATH.exists())
print("PARAMS_FILE_PATH:", PARAMS_FILE_PATH)
print("PARAMS exists:", PARAMS_FILE_PATH.exists())


PROJECT_ROOT: C:\projects\Summarizer
CONFIG_FILE_PATH: C:\projects\Summarizer\config\config.yaml
CONFIG exists: True
PARAMS_FILE_PATH: C:\projects\Summarizer\params.yaml
PARAMS exists: True


In [352]:
from pathlib import Path

print(Path("C:/projects/Summarizer/config/config.yaml").exists())


True


In [353]:
from src.summarizer.constants import CONFIG_FILE_PATH

print("CONFIG_FILE_PATH:", CONFIG_FILE_PATH)
print("CONFIG exists:", CONFIG_FILE_PATH.exists())


CONFIG_FILE_PATH: C:\projects\Summarizer\config\config.yaml
CONFIG exists: True


In [357]:
config_manager = ConfigurationManager()
data_ingestion_config = config_manager.get_data_ingestion_config()
data_ingestion = DataIngestion(config=data_ingestion_config)

data_ingestion.download_file()
data_ingestion.extract_zip_file()


[2025-09-11 20:06:01,608:  INFO:common:yaml file:C:\projects\Summarizer\config\config.yaml loaded successfully]
[2025-09-11 20:06:01,616:  INFO:common:yaml file:C:\projects\Summarizer\params.yaml loaded successfully]


TypeError: expected str, bytes or os.PathLike object, not type

In [None]:
from src.summarizer.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH

print("PROJECT_ROOT:", CONFIG_FILE_PATH.parent.parent)  # should be .../Summarizer
print("CONFIG_FILE_PATH:", CONFIG_FILE_PATH)
print("CONFIG exists:", CONFIG_FILE_PATH.exists())
print("PARAMS exists:", PARAMS_FILE_PATH.exists())


PROJECT_ROOT: .
CONFIG_FILE_PATH: config\config.yaml
CONFIG exists: False
PARAMS exists: False


In [None]:
from pathlib import Path
from src.summarizer.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH

print("Notebook working directory:", Path().resolve())
print("CONFIG_FILE_PATH exists:", CONFIG_FILE_PATH.exists())
print("PARAMS_FILE_PATH exists:", PARAMS_FILE_PATH.exists())


Notebook working directory: C:\projects
CONFIG_FILE_PATH exists: False
PARAMS_FILE_PATH exists: False


In [None]:
from pathlib import Path

NOTEBOOK_DIR = Path().resolve()
PROJECT_ROOT = NOTEBOOK_DIR

# climb up until config/config.yaml exists
while not (PROJECT_ROOT / "config" / "config.yaml").exists():
    PROJECT_ROOT = PROJECT_ROOT.parent
    if PROJECT_ROOT.parent == PROJECT_ROOT:
        raise FileNotFoundError("Cannot find config/config.yaml in any parent folders")

CONFIG_FILE_PATH = PROJECT_ROOT / "config" / "config.yaml"
PARAMS_FILE_PATH = PROJECT_ROOT / "params.yaml"

print("Using PROJECT_ROOT:", PROJECT_ROOT)
print("CONFIG exists:", CONFIG_FILE_PATH.exists())
print("PARAMS exists:", PARAMS_FILE_PATH.exists())


FileNotFoundError: Cannot find config/config.yaml in any parent folders