In [None]:
import os
from box import ConfigBox
from box.exceptions import BoxValueError
from pathlib import Path

In [None]:
%pwd
os.chdir("D:\\End-to-End-Wine-Quality-predidection\\research")
%pwd
os.chdir("../")
%pwd

In [None]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_url: str
    local_data_file: Path
    unzip_dir: Path
    artifact_dir: Path

In [None]:
from ML_Project.constants import *
from ML_Project.utils.common import read_yaml,create_directories

In [None]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath= SCHEMA_FILE_PATH
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)
        create_directories([Path(self.config.artifact_root)])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
    
        config = self.config.data_ingestion
        
        data_ingestion_config = DataIngestionConfig(
            root_dir=config.root_dir,
            source_url=config.source_URL,
            local_data_file=config.local_data_file,
            unzip_dir=config.unzip_dir,
            artifact_dir=self.config.artifact_root
        )
       
        return data_ingestion_config

In [None]:
config=ConfigurationManager()
print(type(config.config.artifact_root))

In [None]:
import os
import urllib.request as request
import zipfile 
from  ML_Project.logging import logger
from ML_Project.utils.common import get_size

In [21]:
import os
from pathlib import Path
from urllib import request
import zipfile
import pandas as pd
import logging

logger = logging.getLogger(__name__)

class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config

    # Download file
    def download_file(self):
        local_file = Path(self.config.local_data_file)
        local_file.parent.mkdir(parents=True, exist_ok=True)
        if not local_file.exists():
            filename, headers = request.urlretrieve(
                self.config.source_url,
                local_file
            )
            logger.info(f"File downloaded successfully! Size: {get_size(Path(filename))}")
        else:
            logger.info(f"File already exists of size: {get_size(local_file)}")
        return local_file

    # Extracting zip file
    def extract_zip_file(self):
        unzip_dir = Path(self.config.unzip_dir)
        unzip_dir.mkdir(parents=True, exist_ok=True)
        
        with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
            zip_ref.extractall(unzip_dir)
        logger.info(f"File extracted successfully in dir: {unzip_dir}")

        # Parcourir tous les fichiers extraits
        for file_path in unzip_dir.iterdir():
            if file_path.is_file():  # seulement les fichiers
                try:
                    # Essayer de lire avec pandas
                    df = pd.read_csv(file_path)
                    logger.info(f"Displaying first 5 rows of {file_path.name}:\n{df.head()}")
                    print(f"First 5 rows of {file_path.name}:\n", df.info)
                except Exception as e:
                    logger.warning(f"Cannot read {file_path.name} with pandas: {e}")
        
        return unzip_dir


In [20]:
config=ConfigurationManager()
print(type(config.config.artifact_root))

[2025-10-03 23:25:26,419] :INFO:common: Created directory at: artifacts :
<class 'str'>


In [22]:
try:
    config=ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion=DataIngestion(config=data_ingestion_config)
    data_ingestion.download_file()
    data_ingestion.extract_zip_file() 
except Exception as e:
    raise e

[2025-10-03 23:25:36,686] :INFO:common: Created directory at: artifacts :
[2025-10-03 23:25:36,687] :INFO:200978528: File already exists of size: ~ 21 KB :
[2025-10-03 23:25:36,689] :INFO:200978528: File extracted successfully in dir: artifacts\data_ingestion :
[2025-10-03 23:25:36,696] :INFO:200978528: Displaying first 5 rows of data.zip:
   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   
3           11.2              0.28         0.56             1.9      0.075   
4            7.4              0.70         0.00             1.9      0.076   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 11.0                  34.0   0.9978  3.51       0.56   
1                 25.0                  6