In [210]:
import os

In [211]:
filepath = r'D:/Data Science/GIT Projects/End-to-End-Image-Classification'

In [212]:
os.chdir(filepath)

In [213]:
%pwd


'D:\\Data Science\\GIT Projects\\End-to-End-Image-Classification'

In [214]:
from dataclasses import dataclass
from pathlib import Path

In [215]:
@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source: Path
    local_data_file: Path
    unzip_dir: Path

In [216]:
from cnn_image_classifier.constants import *
from cnn_image_classifier.utils.common import read_yaml_file, create_directories

class Config_Manager:
    def __init__(self, config_file_path: Path = config_path, param_file_path: Path = param_path):
        self.config = read_yaml_file(config_file_path)
        self.params = read_yaml_file(param_file_path)
        create_directories([self.config.artifact_root])

    def get_data_ingestion_config(self)-> DataIngestionConfig:
        config = self.config.data_ingestion
        create_directories([config.root_dir])
        
        data_ingestion_config = DataIngestionConfig(
            root_dir = Path(config.root_dir),
            source = Path(config.source),
            local_data_file = Path(config.local_data_file),
            unzip_dir = Path(config.unzip_dir)
        )

        return data_ingestion_config
        



In [217]:
import os
import zipfile
import gdown
from cnn_image_classifier import logger
from cnn_image_classifier.utils.common import getsize


In [218]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config

    def download_data(self):
        if not os.path.exists(self.config.local_data_file):            
            url = f"https://drive.google.com/uc?id={self.config.source}"
            save_path = self.config.local_data_file
            gdown.download(url, str(save_path), quiet=False)
            #session = requests.Session()
            #url = f'https://drive.google.com/uc?export=download&id={self.config.source}'
            #reponse = session.get(url, stream=True)
            #confirm_token = None
            #for key,value in reponse.cookies.items():
            #    if key.startswith('download_warning'):
            #        confirm_token = value
            #download_url = f"https://drive.google.com/uc?export=download&id={self.config.source}&confirm={confirm_token}"
            #response = session.get(url, stream=True)
            #with open(self.config.local_data_file, 'wb') as f:
            #    for chunk in response.iter_content(1024):
            #        f.write(chunk)
            logger.info(f"Data downloaded at {self.config.local_data_file}")
        else:
            logger.info(f"Data already exists at {self.config.local_data_file}")
    def extract_data(self):
        unzip_path = self.config.unzip_dir
        os.makedirs(unzip_path, exist_ok=True)
        with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
            zip_ref.extractall(unzip_path)


In [None]:
configuration_manager = Config_Manager()
data_ingestion_config = configuration_manager.get_data_ingestion_config()
data_ingestion = DataIngestion(data_ingestion_config)
data_ingestion.download_data()
data_ingestion.extract_data() 

[2025-02-21 00:20:45,550 : INFO : common] - yaml file:config.yaml has been read successfully.
[2025-02-21 00:20:45,552 : INFO : common] - yaml file:params.yaml has been read successfully.
[2025-02-21 00:20:45,553 : INFO : common] - directory:artifacts has been created successfully.
[2025-02-21 00:20:45,553 : INFO : common] - directory:artifacts/data_ingestion has been created successfully.


Downloading...
From (original): https://drive.google.com/uc?id=1HclwA9zvctv5pUZWrwHn70P51OOuYNqy
From (redirected): https://drive.google.com/uc?id=1HclwA9zvctv5pUZWrwHn70P51OOuYNqy&confirm=t&uuid=fdd7c3a5-90ae-4547-9e2b-c414886f778e
To: D:\Data Science\GIT Projects\End-to-End-Image-Classification\artifacts\data_ingestion\dog-breed-identification.zip
100%|██████████| 725M/725M [00:57<00:00, 12.6MB/s] 

[2025-02-21 00:21:47,962 : INFO : 2533330436] - Data downloaded at artifacts\data_ingestion\dog-breed-identification.zip



