In [1]:
#!/usr/bin/env python3
"""
Updated HuggingFace Setup with Error Handling and Model Selection
"""
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain
import warnings
warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def setup_huggingface_model(model_name="microsoft/DialoGPT-medium"):
  """
  Setup HuggingFace model with proper error handling
  Recommended models by size:
  - "distilgpt2" (~320MB) - Fastest, good for testing
  - "microsoft/DialoGPT-medium" (~350MB) - Good balance
  - "gpt2" (~500MB) - Better quality
  - "microsoft/DialoGPT-large" (~750MB) - High quality
  - "facebook/opt-1.3b" (~2.6GB) - Best quality, requires more RAM
  """
  print(f":arrows_counterclockwise: Loading model: {model_name}")
  print("This may take a few minutes for first-time download...")
  try:
    # Check if CUDA is available
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f":computer: Using device: {device}")
    # Load tokenizer
    print(":memo: Loading tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    # Add padding token if it doesn't exist
    if tokenizer.pad_token is None:
      tokenizer.pad_token = tokenizer.eos_token
    # Load model with appropriate settings
    print(":brain: Loading model...")
    if device == "cuda":
      # GPU setup
      model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16, # Use half precision to save memory
        device_map="auto", # Automatically handle device placement
        low_cpu_mem_usage=True # Reduce CPU memory usage
      )
    else:
      # CPU setup
      model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float32, # Full precision for CPU
        low_cpu_mem_usage=True
      )
    # Create text generation pipeline
    print(":wrench: Setting up pipeline...")
    generator = pipeline(
      'text-generation',
      model=model,
      tokenizer=tokenizer,
      max_length=256, # Adjust based on your needs
      max_new_tokens=100, # Maximum tokens to generate
      temperature=0.7, # Creativity level (0.1-1.0)
      do_sample=True, # Enable sampling
      pad_token_id=tokenizer.eos_token_id,
      device=0 if device == "cuda" else -1
    )
    # Wrap in LangChain
    llm = HuggingFacePipeline(pipeline=generator)
    print(":white_check_mark: Model loaded successfully!")
    return llm, tokenizer, model
  except Exception as e:
    print(f":x: Error loading model {model_name}: {e}")
    print(":bulb: Try a smaller model like 'distilgpt2' or 'microsoft/DialoGPT-medium'")
    return None, None, None
def test_model_generation(llm):
  """Test the model with a simple prompt"""
  if llm is None:
    print(":x: No model available for testing")
    return
  print("\n:test_tube: Testing model generation...")
  # Create a simple prompt template
  prompt_template = PromptTemplate(
    input_variables=["bike_type", "discount"],
    template="""Create a short advertisement for a {bike_type} bike rental with {discount}% discount.
    Advertisement:"""
  )
  # Create LangChain chain
  chain = LLMChain(llm=llm, prompt=prompt_template)
  try:
    # Generate response
    result = chain.run(bike_type="mountain", discount=25)
    print(":memo: Generated text:")
    print("-" * 40)
    print(result)
    print("-" * 40)
    print(":white_check_mark: Test successful!")
  except Exception as e:
    print(f":x: Generation error: {e}")
def check_system_requirements():
  """Check system capabilities"""
  print(":mag: Checking system requirements...")
  # Check PyTorch installation
  print(f"PyTorch version: {torch.__version__}")
  # Check CUDA availability
  if torch.cuda.is_available():
    print(f":white_check_mark: CUDA available: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
  else:
    print(":warning: CUDA not available - using CPU")
  # Check available RAM (approximate)
  import psutil
  ram_gb = psutil.virtual_memory().total / (1024 ** 3)
  print(f"System RAM: {ram_gb:.1f} GB")
  # Recommend model based on system
  if ram_gb < 8:
    recommended = "distilgpt2"
    print(":bulb: Recommended model: distilgpt2 (lightweight)")
  elif ram_gb < 16:
    recommended = "microsoft/DialoGPT-medium"
    print(":bulb: Recommended model: microsoft/DialoGPT-medium")
  else:
    recommended = "facebook/opt-1.3b"
    print(":bulb: Recommended model: facebook/opt-1.3b (high quality)")
  return recommended
def main():
  """Main setup and testing function"""
  print(":woman-biking: BikeEase HuggingFace Model Setup")
  print("=" * 50)
  # Check system requirements
  recommended_model = check_system_requirements()
  print(f"\n:dart: Setting up with recommended model: {recommended_model}")
  # Setup model
  llm, tokenizer, model = setup_huggingface_model(recommended_model)
  if llm is not None:
    # Test the model
    test_model_generation(llm)
    print("\n:tada: Setup complete! You can now use:")
    print("- llm: LangChain HuggingFace pipeline")
    print("- tokenizer: HuggingFace tokenizer")
    print("- model: Raw HuggingFace model")
    return llm, tokenizer, model
  else:
    print("\n:x: Setup failed. Please check the error messages above.")
    return None, None, None
if __name__ == "__main__":
  llm, tokenizer, model = main()

:woman-biking: BikeEase HuggingFace Model Setup
:mag: Checking system requirements...
PyTorch version: 2.7.0+cu126
System RAM: 92.1 GB
:bulb: Recommended model: facebook/opt-1.3b (high quality)

:dart: Setting up with recommended model: facebook/opt-1.3b
:arrows_counterclockwise: Loading model: facebook/opt-1.3b
This may take a few minutes for first-time download...
:computer: Using device: cpu
:memo: Loading tokenizer...
:brain: Loading model...


Device set to use cpu
Both `max_new_tokens` (=100) and `max_length`(=257) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


:wrench: Setting up pipeline...
:white_check_mark: Model loaded successfully!

:test_tube: Testing model generation...
:memo: Generated text:
----------------------------------------
Create a short advertisement for a mountain bike rental with 25% discount.
    Advertisement:                                                                                                    
----------------------------------------
:white_check_mark: Test successful!

:tada: Setup complete! You can now use:
- llm: LangChain HuggingFace pipeline
- tokenizer: HuggingFace tokenizer
- model: Raw HuggingFace model
