## 安裝套件

In [None]:
%pip install transformers==4.40.2 accelerate==0.30.1

Collecting accelerate==0.30.1
  Downloading accelerate-0.30.1-py3-none-any.whl (302 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.6/302.6 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.10.0->accelerate==0.30.1)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.10.0->accelerate==0.30.1)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.10.0->accelerate==0.30.1)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.10.0->accelerate==0.30.1)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.10.0->accelerate==0.30.1)
  Using cached nvidia_c

In [None]:
import torch
from transformers import Starcoder2ForCausalLM as ModelCls
from transformers import GPT2TokenizerFast as TkCls

## 讀取模型

In [None]:
model_path = "bigcode/starcoder2-3b"
model: ModelCls = ModelCls.from_pretrained(
    model_path,
    device_map="auto",
    torch_dtype=torch.float16,
)
tk: TkCls = TkCls.from_pretrained(model_path)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/700 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/12.1G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/7.88k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/777k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/442k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.06M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/958 [00:00<?, ?B/s]

## 定義 Generate 函式

將 Generate 的行為包裝成函式方便後續使用

In [None]:
from transformers import TextStreamer

tk.pad_token = tk.eos_token
ts = TextStreamer(tk)

def generate(prompt, n):
    inputs = tk(prompt, return_tensors="pt").to("cuda")
    return model.generate(**inputs, max_new_tokens=n, streamer=ts)

## 簡易生成

透過 StarCoder 的文字接龍能力完成接下來的程式碼

In [None]:
outputs = generate("def fib(n: int):", 28)

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


def fib(n: int):
    if n == 0:
        return 0
    elif n == 1:
        return 1
    else:
        return fib


## 填充程式

StarCoder 透過特殊的 FIM Token 來完成填充程式碼的任務

這裡使用 `<游標在這>` 來表示使用者的游標停在此處

並根據游標位置將程式碼切成兩半，前半段是 Prefix 後半段是 Suffix

將 Prefix 的內容放在 `<fim_prefix>` 後面

將 Suffix 的內容放在 `<fim_suffix>` 後面

最後放上 `<fim_middle>` 來發動 StarCoder 填充程式碼的能力

In [None]:
def generate(prompt, n):
    inputs = tk(prompt, return_tensors="pt").to("cuda")
    return model.generate(
        **inputs,
        max_new_tokens=n,
        streamer=ts,
        eos_token_id=tk.encode("<file_sep>")[-1],
    )

full_code = """

def hello(name: str):
    print(<游標在這>)

def goodbye(name: str):
    print(f"### 系統：再會了，{name}!")

"""

prefix, suffix = full_code.split("<游標在這>", 1)
full_prompt = f"<fim_prefix>{prefix}<fim_suffix>{suffix}<fim_middle>"
outputs = generate(full_prompt, 16)

Setting `pad_token_id` to `eos_token_id`:6 for open-end generation.


<fim_prefix>

def hello(name: str):
    print(<fim_suffix>)

def goodbye(name: str):
    print(f"### 系統：再會了，{name}!")

<fim_middle>f"### 系統：你好，{name}!"<file_sep>


## 重建程式碼

模型最後的輸出會包含原本的 Prompt

所以我們將 Prompt 去除

然後填回 `<游標在這>` 來完成程式碼重建

In [None]:
tokens = tk.encode(full_prompt)
output = outputs[0][len(tokens):]
middle = tk.decode(output, skip_special_tokens=True)
print(full_code.replace("<游標在這>", middle))



def hello(name: str):
    print(f"### 系統：你好，{name}!")

def goodbye(name: str):
    print(f"### 系統：再會了，{name}!")


