## Download the model (google/vit-base-patch16-224)


In [5]:
"""
Hugging Face Model Downloader for Fine-tuning
Downloads google/vit-base-patch16-224 model with all required files
"""

import os
import sys
import logging
from pathlib import Path
from typing import Optional, Dict, Any
import json
from datetime import datetime

try:
    from huggingface_hub import snapshot_download, hf_hub_download, login
    from transformers import ViTImageProcessor, ViTForImageClassification, ViTConfig
    import torch
except ImportError as e:
    print(f"Missing required packages: {e}")
    print("Please install: pip install transformers huggingface-hub torch")
    sys.exit(1)

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

class HuggingFaceModelDownloader:
    """
    Professional Hugging Face model downloader with comprehensive error handling
    and validation for fine-tuning preparation.
    """
    
    def __init__(self, model_name: str, save_path: str, use_auth_token: bool = False):
        """
        Initialize the model downloader.
        
        Args:
            model_name: HuggingFace model identifier
            save_path: Local path to save the model
            use_auth_token: Whether to use authentication token from environment
        """
        self.model_name = model_name
        self.save_path = Path(save_path)
        self.use_auth_token = use_auth_token
        
        # Ensure save directory exists
        try:
            self.save_path.mkdir(parents=True, exist_ok=True)
            logger.info(f"Initialized downloader for model: {model_name}")
            logger.info(f"Target directory: {self.save_path}")
        except PermissionError:
            logger.error(f"Permission denied creating directory: {self.save_path}")
            raise
        except Exception as e:
            logger.error(f"Failed to create directory {self.save_path}: {e}")
            raise
    
    def authenticate(self) -> bool:
        """
        Authenticate with Hugging Face using environment token.
        
        Returns:
            bool: True if authentication successful, False otherwise
        """
        try:
            # Check for HF_TOKEN in environment
            hf_token = os.getenv('HF_TOKEN')
            if hf_token:
                login(token=hf_token)
                logger.info("Successfully authenticated using HF_TOKEN from environment")
                return True
            else:
                logger.warning("No HF_TOKEN found in environment")
                logger.info("Proceeding without authentication (public models only)")
                return False
        except Exception as e:
            logger.error(f"Authentication failed: {e}")
            return False
    
    def validate_model_exists(self) -> bool:
        """
        Validate that the model exists on Hugging Face Hub.
        
        Returns:
            bool: True if model exists, False otherwise
        """
        try:
            # Try to get model config to validate existence
            hf_hub_download(
                repo_id=self.model_name,
                filename="config.json",
                cache_dir=None,
                local_files_only=False,
                token=os.getenv('HF_TOKEN') if self.use_auth_token else None
            )
            logger.info(f"Model {self.model_name} validated successfully")
            return True
        except Exception as e:
            logger.error(f"Model validation failed: {e}")
            return False
    
    def download_complete_model(self) -> bool:
        """
        Download the complete model with all files required for fine-tuning.
        
        Returns:
            bool: True if download successful, False otherwise
        """
        try:
            logger.info("Starting complete model download...")
            
            # Download all model files using snapshot_download
            downloaded_path = snapshot_download(
                repo_id=self.model_name,
                cache_dir=None,  # Use default cache
                local_dir=str(self.save_path),
                local_dir_use_symlinks=False,
                revision="main",
                resume_download=True,
                token=os.getenv('HF_TOKEN') if self.use_auth_token else None
            )
            
            logger.info(f"Model downloaded successfully to: {downloaded_path}")
            return True
            
        except Exception as e:
            logger.error(f"Model download failed: {e}")
            return False
    
    def verify_download(self) -> Dict[str, bool]:
        """
        Verify that all essential files have been downloaded.
        
        Returns:
            Dict[str, bool]: Status of each required file
        """
        required_files = {
            'config.json': False,
            'preprocessor_config.json': False,
        }
        
        # For ViT models, we need either pytorch_model.bin OR model.safetensors
        model_files = ['pytorch_model.bin', 'model.safetensors']
        model_file_found = False
        
        optional_files = {
            'tokenizer.json': False,
            'tokenizer_config.json': False,
            'vocab.txt': False,
            'README.md': False
        }
        
        # Check required files
        for filename in required_files.keys():
            file_path = self.save_path / filename
            if file_path.exists():
                required_files[filename] = True
                logger.info(f"Required file found: {filename}")
            else:
                logger.warning(f"Required file missing: {filename}")
        
        # Check for model files
        for filename in model_files:
            file_path = self.save_path / filename
            if file_path.exists():
                model_file_found = True
                required_files[filename] = True
                logger.info(f"Model file found: {filename}")
                break
        
        if not model_file_found:
            logger.error("No model file found (pytorch_model.bin or model.safetensors)")
        
        # Check optional files
        for filename in optional_files.keys():
            file_path = self.save_path / filename
            if file_path.exists():
                optional_files[filename] = True
                logger.info(f"Optional file found: {filename}")
        
        return {**required_files, **optional_files, 'model_file_found': model_file_found}
    
    def test_model_loading(self) -> bool:
        """
        Test loading the downloaded model to ensure it's functional.
        
        Returns:
            bool: True if model loads successfully, False otherwise
        """
        try:
            logger.info("Testing model loading...")
            
            # Load configuration
            config = ViTConfig.from_pretrained(str(self.save_path))
            logger.info(f"Model config loaded: {config.num_labels} classes")
            
            # Load image processor
            processor = ViTImageProcessor.from_pretrained(str(self.save_path))
            logger.info("Image processor loaded successfully")
            
            # Load model
            model = ViTForImageClassification.from_pretrained(str(self.save_path))
            logger.info(f"Model loaded successfully: {model.__class__.__name__}")
            
            # Test model forward pass with dummy input
            dummy_input = torch.randn(1, 3, 224, 224)
            with torch.no_grad():
                outputs = model(dummy_input)
                logger.info(f"Model forward pass successful: output shape {outputs.logits.shape}")
            
            return True
            
        except Exception as e:
            logger.error(f"Model loading test failed: {e}")
            return False
    
    def save_download_info(self) -> None:
        """
        Save download information and metadata.
        """
        info = {
            'model_name': self.model_name,
            'download_path': str(self.save_path),
            'download_timestamp': datetime.now().isoformat(),
            'model_architecture': 'Vision Transformer (ViT)',
            'intended_use': 'Image Classification - Fine-tuning Ready',
            'framework': 'PyTorch/Transformers'
        }
        
        info_path = self.save_path / 'download_info.json'
        try:
            with open(info_path, 'w') as f:
                json.dump(info, f, indent=2)
            logger.info(f"Download information saved to: {info_path}")
        except Exception as e:
            logger.error(f"Failed to save download info: {e}")
    
    def run_complete_download(self) -> bool:
        """
        Execute the complete download process with all validation steps.
        
        Returns:
            bool: True if entire process successful, False otherwise
        """
        logger.info("Starting complete model download process...")
        
        # Step 1: Authentication
        if self.use_auth_token:
            self.authenticate()
        
        # Step 2: Validate model exists
        if not self.validate_model_exists():
            logger.error("Model validation failed. Aborting download.")
            return False
        
        # Step 3: Download model
        if not self.download_complete_model():
            logger.error("Model download failed. Aborting process.")
            return False
        
        # Step 4: Verify download
        verification_results = self.verify_download()
        required_files_present = all([
            verification_results.get('config.json', False),
            verification_results.get('model_file_found', False)
        ])
        
        if not required_files_present:
            logger.error("Required files missing after download")
            return False
        
        # Step 5: Test model loading
        if not self.test_model_loading():
            logger.error("Model loading test failed")
            return False
        
        # Step 6: Save download info
        self.save_download_info()
        
        logger.info("Complete model download process finished successfully")
        logger.info(f"Model ready for fine-tuning at: {self.save_path}")
        
        return True


def main():
    """
    Main execution function for downloading the ViT model.
    """
    # Configuration
    MODEL_NAME = "google/vit-base-patch16-224"
    SAVE_PATH = "/Volumes/KODAK/folder 02/Brest_cancer_prediction/model/raw_model"
    
    # Check if save path is accessible
    save_path = Path(SAVE_PATH)
    if not save_path.parent.exists():
        print(f"ERROR: Parent directory does not exist: {save_path.parent}")
        print("Please create the parent directory first or check if the external drive is mounted")
        sys.exit(1)
    
    try:
        # Initialize downloader (set use_auth_token=False for public models)
        downloader = HuggingFaceModelDownloader(
            model_name=MODEL_NAME,
            save_path=SAVE_PATH,
            use_auth_token=False  # Changed to False since this is a public model
        )
        
        # Execute download
        success = downloader.run_complete_download()
        
        if success:
            print(f"SUCCESS: Model {MODEL_NAME} downloaded and validated successfully")
            print(f"Location: {SAVE_PATH}")
            print("Model is ready for fine-tuning process")
        else:
            print(f"FAILED: Model download process encountered errors")
            print("Please check the logs above for details")
            sys.exit(1)
            
    except Exception as e:
        print(f"CRITICAL ERROR: {e}")
        sys.exit(1)


if __name__ == "__main__":
    main()

2025-06-22 14:24:24,957 - INFO - Initialized downloader for model: google/vit-base-patch16-224
2025-06-22 14:24:24,964 - INFO - Target directory: /Volumes/KODAK/folder 02/Brest_cancer_prediction/model/raw_model
2025-06-22 14:24:24,966 - INFO - Starting complete model download process...


config.json:   0%|          | 0.00/69.7k [00:00<?, ?B/s]

2025-06-22 14:24:26,000 - INFO - Model google/vit-base-patch16-224 validated successfully
2025-06-22 14:24:26,001 - INFO - Starting complete model download...


Fetching 8 files:   0%|          | 0/8 [00:00<?, ?it/s]

For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder.


README.md:   0%|          | 0.00/5.69k [00:00<?, ?B/s]

tf_model.h5:   0%|          | 0.00/347M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

.gitattributes:   0%|          | 0.00/744 [00:00<?, ?B/s]

flax_model.msgpack:   0%|          | 0.00/346M [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/346M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

2025-06-22 14:40:27,893 - INFO - Model downloaded successfully to: /Volumes/KODAK/folder 02/Brest_cancer_prediction/model/raw_model
2025-06-22 14:40:27,903 - INFO - Required file found: config.json
2025-06-22 14:40:27,911 - INFO - Required file found: preprocessor_config.json
2025-06-22 14:40:27,916 - INFO - Model file found: pytorch_model.bin
2025-06-22 14:40:27,960 - INFO - Optional file found: README.md
2025-06-22 14:40:27,962 - INFO - Testing model loading...
2025-06-22 14:40:27,988 - INFO - Model config loaded: 1000 classes
2025-06-22 14:40:28,003 - INFO - Image processor loaded successfully
2025-06-22 14:40:28,929 - INFO - Model loaded successfully: ViTForImageClassification
2025-06-22 14:40:29,106 - INFO - Model forward pass successful: output shape torch.Size([1, 1000])
2025-06-22 14:40:29,111 - INFO - Download information saved to: /Volumes/KODAK/folder 02/Brest_cancer_prediction/model/raw_model/download_info.json
2025-06-22 14:40:29,111 - INFO - Complete model download proces

SUCCESS: Model google/vit-base-patch16-224 downloaded and validated successfully
Location: /Volumes/KODAK/folder 02/Brest_cancer_prediction/model/raw_model
Model is ready for fine-tuning process


: 