# **DATA RELATED STUFF**

In [13]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("mohamedhanyyy/chest-ctscan-images")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/mohamedhanyyy/chest-ctscan-images?dataset_version_number=1...


100%|██████████| 119M/119M [00:19<00:00, 6.25MB/s] 

Extracting model files...





Path to dataset files: C:\Users\arpit\.cache\kagglehub\datasets\mohamedhanyyy\chest-ctscan-images\versions\1


In [14]:
import shutil
shutil.move(path, "C:\Projects\Chest-Cancer-Classification-App")

'C:\\Projects\\Chest-Cancer-Classification-App\\1'

In [18]:
import gdown 

file_id = "1Rfn_h7aGCgpSAuZJVm2oGM31Rl6f2Xs3"
url = f"https://drive.google.com/uc?/export=download&id={file_id}"

In [23]:
print(url)

https://drive.google.com/uc?/export=download&id=1Rfn_h7aGCgpSAuZJVm2oGM31Rl6f2Xs3


In [19]:
gdown.download(url, "Chest-Data.zip")

Downloading...
From (original): https://drive.google.com/uc?/export=download&id=1Rfn_h7aGCgpSAuZJVm2oGM31Rl6f2Xs3
From (redirected): https://drive.google.com/uc?%2Fexport=download&id=1Rfn_h7aGCgpSAuZJVm2oGM31Rl6f2Xs3&confirm=t&uuid=1e2b5a97-2cbe-4fa6-9d2c-33d48180ff0b
To: c:\Projects\Chest-Cancer-Classification-App\Research\Chest-Data.zip
100%|██████████| 124M/124M [00:18<00:00, 6.57MB/s] 


'Chest-Data.zip'

# **DATA INGESTION STEP**

In [20]:
%pwd

'c:\\Projects\\Chest-Cancer-Classification-App\\Research'

In [21]:
import os
os.chdir("../")

In [22]:
%pwd

'c:\\Projects\\Chest-Cancer-Classification-App'

### Constants

In [25]:
from pathlib import Path

CONFIG_FILE_PATH = Path("config/config.yaml")
PARAMS_FILE_PATH = Path("params.yaml")

### Config.yaml

- artifacts_root: artifacts

- data_ingestion:
  - root_dir: artifacts/data_ingestion
  - source_url: https://drive.google.com/uc?/export=download&id=1Rfn_h7aGCgpSAuZJVm2oGM31Rl6f2Xs3
  - data_dir: artifacts/data_ingestion/data.zip
  - unzip_dir: artifacts/data_ingestion

### CONFIG ENTITY

In [26]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_url: str
    data_dir: Path
    unzip_dir: Path

### CONFIGURATION

In [None]:
from src.logger import logger
from src.exception import CustomException
from src.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH
from src.utils import create_directories, read_yaml
## from src.entity.config_entity import DataIngestionConfig

class AppConfig:
    def __init__(self):
        self.config_filepath = CONFIG_FILE_PATH
        self.params_filepath = PARAMS_FILE_PATH

        self.config = read_yaml(self.config_filepath)
        self.params = read_yaml(self.params_filepath)

        create_directories([self.config.artifacts_root])   ### Now always pass path to this function as list
    
    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion

        create_directories([config.root_dir])              ### Now always pass path to this function as list

        data_ingestion_config = DataIngestionConfig(
            root_dir = config.root_dir,
            source_url = config.source_url,
            data_dir = config.data_dir,
            unzip_dir = config.unzip_dir
        )
        
        return data_ingestion_config

### COMPONENTS

In [None]:
from src.logger import logger
from src.exception import CustomException
import gdown
import os, sys
import zipfile
## from src.configuration.configuration import AppConfig

class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config

    def download_data(self) -> str:
        try:
            root_dir = self.config.root_dir
            data_url = self.config.source_url
            zip_download_dir = self.config.data_dir

            create_directories([root_dir])            ### Now always pass path to this function as list

            gdown.download(data_url, zip_download_dir)

        except Exception as e:
            raise CustomException(e, sys)
    
    def unzip_data(self):
        try:
            unzip_dir = self.config.unzip_dir
            zip_download_dir = self.config.data_dir

            create_directories([unzip_dir])             ### Now always pass path to this function as list

            with zipfile.ZipFile(zip_download_dir, 'r') as zip_ref:
                zip_ref.extractall(unzip_dir)

        except Exception as e:
            raise CustomException(e, sys)

### PIELINE

In [38]:
## from src.configuration.configuration import AppConfig
## from src.components.data_ingestion import DataIngestion
import sys
from src.logger import logger
from src.exception import CustomException

try:
    config = AppConfig()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion_obj = DataIngestion(config=data_ingestion_config)
    data_ingestion_obj.download_data()
    data_ingestion_obj.unzip_data()
except Exception as e:
    raise CustomException(e, sys)

[32m[2025-06-25 04:01:12]   19 | INFO     | yaml file: config\config.yaml loaded successfully[0m
[32m[2025-06-25 04:01:12]   19 | INFO     | yaml file: params.yaml loaded successfully[0m
Downloading...
From (original): https://drive.google.com/uc?/export=download&id=1Rfn_h7aGCgpSAuZJVm2oGM31Rl6f2Xs3
From (redirected): https://drive.google.com/uc?%2Fexport=download&id=1Rfn_h7aGCgpSAuZJVm2oGM31Rl6f2Xs3&confirm=t&uuid=5c07c6eb-c2b1-4613-b485-e79f90b567a0
To: c:\Projects\Chest-Cancer-Classification-App\artifacts\data_ingestion\data.zip
100%|██████████| 124M/124M [00:18<00:00, 6.58MB/s] 
