In [None]:
# python3 -mm venv growthschool_genai
# source growthschool_genai/bin/activate
# pip install streamlit transformers sentence-transformers faiss-cpu PyPDF2 ipykernel
# python -m ipykernel install --user --name=growthschool_genai
# jupyter lab

In [3]:
import json
from datetime import datetime

from huggingface_hub import HfApi
from huggingface_hub import hf_hub_download

# https://ollama.com/download
# https://github.com/ollama/ollama-python

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
def serialize_object(obj):
    """
    Helper function to serialize custom objects like EvalResult.
    Converts objects with __dict__ attribute to dictionaries and handles datetime objects.
    """
    if isinstance(obj, datetime):
        return obj.isoformat()  # Convert datetime to ISO 8601 string
    elif hasattr(obj, "__dict__"):
        return {key: serialize_object(value) for key, value in obj.__dict__.items()}
    elif isinstance(obj, list):
        return [serialize_object(item) for item in obj]
    elif isinstance(obj, dict):
        return {key: serialize_object(value) for key, value in obj.items()}
    else:
        return obj  # Return the value as-is for primitive types

def model_info_to_json(model_info):
    """
    Convert a Hugging Face ModelInfo object to a generalized dictionary,
    handling non-serializable fields like EvalResult and datetime.
    """
    return serialize_object(model_info)

In [5]:


def download_required_files(repo_id, local_dir="./model_files"):
    """
    Download only the necessary files for quickly loading a Hugging Face model.
    
    Args:
        repo_id (str): The Hugging Face model repository ID (e.g., "bert-base-uncased").
        local_dir (str): The directory where the files will be saved.
    """
    import os
    
    # List of required files
    required_files = [
        "vocab.txt",          # Vocabulary file (if applicable)
        "vocab.json",          # Vocabulary file (if applicable)       
        "config.json",        # Model configuration
        "tokenizer.json",     # Tokenizer configuration (if applicable)
        "merges.txt",         # BPE merge rules file (if applicable)
        "pytorch_model.bin",  # Model weights
        "model.safetensors",  # Alternative model weights format
    ]
    
    # Ensure the output directory exists
    os.makedirs(local_dir, exist_ok=True)
    
    # Download only the required files
    for file_name in required_files:
        try:
            print(f"Attempting to download: {file_name}")
            local_path = hf_hub_download(repo_id=repo_id, filename=file_name, local_dir=local_dir)
            print(f"Saved to: {local_path}")
        except Exception as e:
            print(f"Could not download {file_name}: {e}")
    api = HfApi()
    with open(f"{repo_id.split('/')[1]}.json", "w") as json_file:
        json_file.write(json.dumps(model_info_to_json(api.model_info(model))))

In [1]:
models=[
    # "nlptown/bert-base-multilingual-uncased-sentiment", # sentiment analysis
    # "facebook/bart-large-cnn", # summarization
    # "deepset/roberta-base-squad2", # question-answering
    # "dbmdz/bert-large-cased-finetuned-conll03-english", # Named Entity Recognition
    # "distilbert/distilbert-base-uncased-finetuned-sst-2-english", # Sentiment Analysis
    # "openai-community/gpt2-large", #text generation
    # "atharvamundada99/bert-large-question-answering-finetuned-legal", #question-answering
    # "distilbert/distilbert-base-cased-distilled-squad", #question-answering
    "facebook/bart-large-mnli", #classification
    
]

In [None]:
for model in models:
    download_required_files(model, local_dir=f"models/model.split('/')[1]")


Attempting to download: vocab.txt
Could not download vocab.txt: 404 Client Error. (Request ID: Root=1-6778c2d0-6bb5018f019a96be1944e908;e388836d-6ff0-4b31-8dd2-4cba282bccb1)

Entry Not Found for url: https://huggingface.co/facebook/bart-large-mnli/resolve/main/vocab.txt.
Attempting to download: vocab.json
Saved to: models/model.split('/')[1]/vocab.json
Attempting to download: config.json
Saved to: models/model.split('/')[1]/config.json
Attempting to download: tokenizer.json
Saved to: models/model.split('/')[1]/tokenizer.json
Attempting to download: merges.txt
Saved to: models/model.split('/')[1]/merges.txt
Attempting to download: pytorch_model.bin
Saved to: models/model.split('/')[1]/pytorch_model.bin
Attempting to download: model.safetensors
