## Install dependencies

In [None]:
!pip -q install git+https://github.com/huggingface/transformers
!pip -q install accelerate>=0.12.0

## Gradio

In [None]:
# install dependencies
import locale
locale.getpreferredencoding = lambda: "UTF-8"
!apt-get install ffmpeg
!pip install gradio
!pip install xformers

Reading package lists... Done
Building dependency tree       
Reading state information... Done
ffmpeg is already the newest version (7:4.2.7-0ubuntu0.1).
0 upgraded, 0 newly installed, 0 to remove and 15 not upgraded.
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting gradio
  Downloading gradio-3.35.2-py3-none-any.whl (19.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.7/19.7 MB[0m [31m68.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting aiofiles (from gradio)
  Downloading aiofiles-23.1.0-py3-none-any.whl (14 kB)
Collecting aiohttp (from gradio)
  Downloading aiohttp-3.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m71.2 MB/s[0m eta [36m0:00:00[0m
Collecting fastapi (from gradio)
  Downloading fastapi-0.97.0-py3-none-any.whl (56 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m

In [None]:
import gradio as gr
from transformers import *
import torch

# 그라디오 페이지 테마 설정
theme = gr.themes.Monochrome(
    primary_hue="indigo",
    secondary_hue="blue",
    neutral_hue="slate",
    radius_size=gr.themes.sizes.radius_sm,
    font=[gr.themes.GoogleFont("Open Sans"), "ui-sans-serif", "system-ui", "sans-serif"],
)


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting xformers
  Downloading xformers-0.0.20-cp310-cp310-manylinux2014_x86_64.whl (109.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m109.1/109.1 MB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
Collecting pyre-extensions==0.0.29 (from xformers)
  Downloading pyre_extensions-0.0.29-py3-none-any.whl (12 kB)
Collecting typing-inspect (from pyre-extensions==0.0.29->xformers)
  Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect->pyre-extensions==0.0.29->xformers)
  Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)
Installing collected packages: mypy-extensions, typing-inspect, pyre-extensions, xformers
Successfully installed mypy-extensions-1.0.0 pyre-extensions-0.0.29 typing-inspect-0.9.0 xformers-0.0.20


## Dolly

In [None]:
import torch
from transformers import pipeline

# 사전학습된 토크나이저와 모델 불러오기
model_name = "databricks/dolly-v2-2-8b"
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_8bit=False, trust_remote_code=True)

end_key_token_id = tokenizer.encode("### End")[0]

# Dolly 파이프라인 생성
instruct_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer,pad_token_id=tokenizer.pad_token_id, eos_token_id=end_key_token_id)


loading file vocab.json from cache at None
loading file merges.txt from cache at None
loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--databricks--dolly-v2-2-8b/snapshots/877db3ed12a3086500d144b9ef74e469b107a041/tokenizer.json
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--databricks--dolly-v2-2-8b/snapshots/877db3ed12a3086500d144b9ef74e469b107a041/special_tokens_map.json
loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--databricks--dolly-v2-2-8b/snapshots/877db3ed12a3086500d144b9ef74e469b107a041/tokenizer_config.json
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--databricks--dolly-v2-2-8b/snapshots/877db3ed12a3086500d144b9ef74e469b107a041/config.json
Model config GPTNeoXConfig {
  "_name_or_path": "databricks/dolly-v2-2-8b",
  "architectures": [
    "GPTNeoXForCausalLM"
  ],
  "bos_token_

In [None]:
# 함수 설정

# 주어진 토큰으로부터 대답을 생성하는 함수
def generate(instruction):
    input_ids = tokenizer.encode(instruction, return_tensors="pt")
    input_ids = input_ids.to(model.device)  # Move input_ids to the same device as the model
    generated_output = model.generate(input_ids, max_length=256,pad_token_id=tokenizer.pad_token_id, eos_token_id=end_key_token_id)
    dd = tokenizer.decode(generated_output[0])
    return dd


examples = [
    "Instead of making a peanut butter and jelly sandwich, what else could I combine peanut butter with in a sandwich? Give five ideas",
    "How do I make a campfire?",
    "Write me a tweet about the release of Dolly 2.0, a new LLM"
]

# 예시문장 처리 함수
def process_example(args):
    for x in generate(args):
        pass
    return x


## Implementation

In [None]:
# 그라디오로 dolly 모델 구현
css = ".generating {visibility: hidden}"

with gr.Blocks(theme=theme, analytics_enabled=False, css=css) as demo:
    with gr.Column():
        with gr.Row():
            with gr.Column(scale=3):
                instruction = gr.Textbox(placeholder="Enter your question here", label="Question", elem_id="q-input")

                with gr.Box():
                    gr.Markdown("**Answer**")
                    output = gr.Markdown(elem_id="q-output")
                submit = gr.Button("Generate", variant="primary")
                gr.Examples(
                    examples=examples,
                    inputs=[instruction],
                    cache_examples=False,
                    fn=process_example,
                    outputs=[output],
                )


    submit.click(generate, inputs=[instruction], outputs=[output])
    instruction.submit(generate, inputs=[instruction], outputs=[output])

demo.queue(concurrency_count=1).launch(debug=True)