# Sharing pretrained models (PyTorch)

Install the Transformers, Datasets, and Evaluate libraries to run this notebook.

In [None]:
# Install required libraries for Transformers, datasets, and evaluation
# Also install git-lfs for handling large files in Git repositories
!uv pip install datasets evaluate transformers[sentencepiece]
!apt install git-lfs

You will need to setup git, adapt your email and name in the following cell.

In [None]:
!git config --global user.email "you@example.com"
!git config --global user.name "Your Name"

You will also need to be logged in to the Hugging Face Hub. Execute the following and enter your credentials.

In [None]:
# Login to Hugging Face Hub to access private models and upload your own
# This will prompt for your Hugging Face token for authentication
from huggingface_hub import notebook_login

notebook_login()

In [None]:
# Setting up training arguments with Hub integration
# push_to_hub=True automatically uploads your model to the Hub after training
# save_strategy="epoch" saves the model at the end of each training epoch
from transformers import TrainingArguments

training_args = TrainingArguments(
    "bert-finetuned-mrpc", save_strategy="epoch", push_to_hub=True
)

In [None]:
# Loading a pretrained model and tokenizer for demonstration
# CamemBERT is a French language model based on RoBERTa architecture
from transformers import AutoModelForMaskedLM, AutoTokenizer

checkpoint = "camembert-base"

model = AutoModelForMaskedLM.from_pretrained(checkpoint)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [None]:
# Uploading a model to the Hub - simplest method
# This creates a repository under your username with the model files
model.push_to_hub("dummy-model")

In [None]:
# Uploading the tokenizer to the same repository
# The tokenizer needs to be uploaded separately to ensure compatibility
tokenizer.push_to_hub("dummy-model")

In [None]:
# Uploading to an organization repository
# Specify the organization parameter to upload under an organization instead of your personal account
tokenizer.push_to_hub("dummy-model", organization="huggingface")

In [None]:
# Using explicit authentication token (alternative to notebook_login)
# You can pass your token directly instead of using the login methods
tokenizer.push_to_hub("dummy-model", organization="huggingface", use_auth_token="<TOKEN>")

In [None]:
# Advanced Hub operations using huggingface_hub library
# These functions provide fine-grained control over Hub operations
from huggingface_hub import (
    # User management
    login,
    logout,
    whoami,

    # Repository creation and management
    create_repo,
    delete_repo,
    update_repo_visibility,

    # And some methods to retrieve/change information about the content
    list_models,
    list_datasets,
    list_metrics,
    list_repo_files,
    upload_file,
    delete_file,
)

In [None]:
# Creating a repository manually on the Hub
# This creates an empty repository that you can then populate with files
from huggingface_hub import create_repo

create_repo("dummy-model")

In [None]:
# Creating a repository under an organization
# This creates the repo under the specified organization's namespace
from huggingface_hub import create_repo

create_repo("dummy-model", organization="huggingface")

In [None]:
# Uploading individual files to the Hub
# Use this method when you want to upload specific files one by one
from huggingface_hub import upload_file

upload_file(
    "<path_to_file>/config.json",
    path_in_repo="config.json",  # Where the file will be stored in the repo
    repo_id="<namespace>/dummy-model",  # Repository identifier
)

In [None]:
# Using Git-based workflow with Repository class
# This clones the repository locally and provides Git operations
from huggingface_hub import Repository

repo = Repository("<path_to_dummy_folder>", clone_from="<namespace>/dummy-model")

In [None]:
# Git operations through the Repository object
# These mirror standard Git commands for version control
repo.git_pull()      # Pull latest changes from remote
repo.git_add()       # Stage all changes for commit
repo.git_commit()    # Commit staged changes
repo.git_push()      # Push commits to remote repository
repo.git_tag()       # Create a Git tag

In [None]:
# Pull the latest changes from the remote repository
# Always good practice before making local changes
repo.git_pull()

In [None]:
# Save model and tokenizer files locally to the repository folder
# This creates all the necessary files for sharing your model
model.save_pretrained("<path_to_dummy_folder>")
tokenizer.save_pretrained("<path_to_dummy_folder>")

In [None]:
# Commit and push the model files to the Hub
# git_add() stages all new files, git_commit() creates a commit, git_push() uploads to Hub
repo.git_add()
repo.git_commit("Add model and tokenizer files")
repo.git_push()

In [None]:
# Complete workflow example: load, modify, and save a model
# This demonstrates the typical pattern for fine-tuning and sharing models
from transformers import AutoModelForMaskedLM, AutoTokenizer

checkpoint = "camembert-base"

model = AutoModelForMaskedLM.from_pretrained(checkpoint)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

# Do whatever with the model, train it, fine-tune it...
# (Your training/fine-tuning code would go here)

# Save the modified model and tokenizer locally
model.save_pretrained("<path_to_dummy_folder>")
tokenizer.save_pretrained("<path_to_dummy_folder>")