# 使用模型并贡献模型

## 使用模型

In [None]:
from transformers import pipeline

camembert_fill_mask = pipeline("fill-mask", model="camembert-base")
results = camembert_fill_mask("Le camembert est <mask> :)")

In [None]:
from transformers import AutoTokenizer, AutoModelForMaskedLM

tokenizer = AutoTokenizer.from_pretrained("camembert-base")
model = AutoModelForMaskedLM.from_pretrained("camembert-base")

## 贡献模型

### 创建新模型库的三种方法
- push_to_hub API接口
- huggingface_hub python库
- web界面

### 登录Hugging Face

In [None]:
from huggingface_hub import notebook_login

notebook_login()

# 命令行中登录方法
# huggingface-cli login

## 第一种：push_to_hub API接口

1. 在TrainingArguments中设置push_to_hub参数为True
    - 如果不设置仓库名称则使用输出目录名作为仓库名称，可以通过hub_model_id参数设置
    - 如果要上传的是特定的机构，在hub_model_id中设置机构名称与仓库名称间加斜线

In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="bert-finetuned-mrpc", 
    save_strategy="epoch", 
    push_to_hub=True,
    hub_model_id="my_organization/my_repo_name",
)

In [None]:
# 上传模型的最新版本。它还将生成包含所有相关元数据的模型卡，报告使用的超参数和评估结果
trainer.push_to_hub()

2. push_to_hub指定文件

In [None]:
from transformers import AutoModelForMaskedLM, AutoTokenizer

checkpoint = "camembert-base"

model = AutoModelForMaskedLM.from_pretrained(checkpoint)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

### 在预训练之后可以通过以下的方式来上传微调模型文件

In [None]:
model.push_to_hub("dummy-model", 
                  organization="huggingface",
                  use_auth_token="<TOKEN>")  # 创建新的仓库
tokenizer.push_to_hub("dummy-model", 
                      organization="huggingface",
                      use_auth_token="<TOKEN>")  # 将模型和tokenizer推送到同一个仓库

## 第二种：huggingface_hub python库

In [None]:
# 在命令行中登录
# huggingface-cli login

### 管理仓库的创建、删除等

In [None]:
from huggingface_hub import (
    # User management
    login,
    logout,
    whoami,

    # Repository creation and management
    create_repo,
    delete_repo,
    update_repo_visibility,

    # And some methods to retrieve/change information about the content
    list_models,
    list_datasets,
    list_metrics,
    list_repo_files,
    upload_file,
    delete_file,
)

### 创建新仓库

In [None]:
from huggingface_hub import create_repo

create_repo(repo_id="dummy-model", 
            organization="huggingface", 
            private=False,   # 设置为True则创建私有仓库
            token="<TOKEN>",   
            repo_type="dataset")  # 创建的是"dataset"或"space"

## web界面
1. https://huggingface.co/new
2. 指定onwer组织
3. 指定Model名字
4. 指定仓库是否公开
5. 增加README.md文件，对项目及文件进行描述
6. 上传模型文件

## 上传模型文件

### 方法一

In [None]:
from huggingface_hub import upload_file

upload_file(
    "<path_to_file>/config.json",
    path_in_repo="config.json",
    repo_id="<namespace>/dummy-model",
)
# token="<TOKEN>", repo_type="dataset"

### 方法二

In [None]:
# 克隆远程存储库将其初始化到本地文件夹
from huggingface_hub import Repository

repo = Repository("<path_to_dummy_folder>",
                  clone_from="<namespace>/dummy-model")

- repo.git_pull()
- repo.git_add()
- repo.git_commit()
- repo.git_push()
- repo.git_tag()

In [None]:
# 拉取最新克隆
# repo.git_pull()

# 保存模型和tokenizer
model.save_pretrained("<path_to_dummy_folder>")
tokenizer.save_pretrained("<path_to_dummy_folder>")

# 添加文件并提交更改
repo.git_add()
repo.git_commit("Add model and tokenizer files")
repo.git_push()

### 方法三

In [None]:
# 初始化git-lfs
# git lfs install

# 克隆仓库
# git clone https://huggingface.co/<namespace>/<your-model-id>
# git clone https://huggingface.co/lysandre/dummy

# 切换路径并查看文件
# cd dummy && ls

from transformers import AutoModelForMaskedLM, AutoTokenizer

checkpoint = "camembert-base"

model = AutoModelForMaskedLM.from_pretrained(checkpoint)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

# Do whatever with the model, train it, fine-tune it...

model.save_pretrained("<path_to_dummy_folder>")
tokenizer.save_pretrained("<path_to_dummy_folder>")

# 添加文件并提交更改
# git add .
# git status
# git lfs status
# git commit -m "First model version"
# git push