# huggingface - pipeline

In [None]:
pip install transformers datasets xformers -q

In [None]:
pip install --upgrade transformers

In [None]:
pip install --upgrade pip

In [None]:
pip install transformers datasets xformers -c

## pipeline - 감정 분석

In [None]:
from transformers import pipeline

classifier = pipeline("sentiment-analysis")

results = classifier("We are very happy.")
print(results)

results = classifier(["We are very happy.", "We hope you don't hate it."])
print(results)

In [None]:
from transformers import pipeline

classifier = pipeline("sentiment-analysis", model="matthewburke/korean_sentiment")
classifier(["괜찮은데?", "생각보단 별로네", "그저그래", "영화 재미있었다", "평범했어"])

## pipeline - 텍스트 생성

In [None]:
from transformers import pipeline

generator = pipeline("text-generation")

result = generator("I'm pregnant", max_length=1000)
print(result[0]['generated_text'])

In [None]:
from transformers import pipeline

# 모델 로딩
generator = pipeline("text-generation", max_length=1000,
                     model="skt/kogpt2-base-v2")

# 모델 이용
result = generator("KT 에이블스쿨은 최고야.")
print(result[0]['generated_text'])

## pipeline - 이미지 분류

In [None]:
from IPython.display import Image
from transformers import pipeline

img = "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png"
img2 = 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'

# 이미지 분류
classifier = pipeline("image-classification")
result = classifier(img2)
print(result)

# 이미지 설명
caption = pipeline("image-to-text")
result = caption(img2)
print(result)

# 이미지 보기
Image(img2)


## pipeline - 음성 인식

In [None]:
pip install soundfile
pip install librosa
pip install torchaudio

In [None]:
# 오디오 데이터셋 로딩
from datasets import load_dataset

dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")
print(dataset[0]['transcription'])

In [None]:
# 오디오 파일 재생
from IPython.display import Audio
Audio(dataset[0]["audio"]['path'])

In [None]:
import torch
from transformers import pipeline

# 모델 로딩
speech_recognizer = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")

# 모델 사용
result = speech_recognizer(dataset[0]["audio"])
print(result)

In [None]:
import torch
from transformers import pipeline
from datasets import load_dataset
from IPython.display import Audio

# 오디오 데이터셋 로딩
dataset = load_dataset("PolyAI/minds14", name="ko-KR", split="train")
print(dataset)

# 모델 로딩
speech_recognizer = pipeline("automatic-speech-recognition", model="Hyuk/wav2vec2-korean-v2")

# 모델 사용
result = speech_recognizer(dataset[1]["audio"])
print(result)

# 오디오 파일 재생
Audio(dataset[1]["audio"]['path'])


## gradio chatbot

In [None]:
# pip install gradio 
# !pip install peft -q
# !pip install sentencepiece -q

In [None]:
from transformers import pipeline
import gradio as gr

def greet(sentence):
    classifier = pipeline("sentiment-analysis", "matthewburke/korean_sentiment")
    result = classifier(sentence)
    d = {
        'LABEL_1': '긍정',
        'LABEL_0': '부정'
    }

    return f"{sentence}\n 이 문장이 {d[result[0]['label']]} 일 확률은 {result[0]['score'] * 100}%"

demo = gr.Interface(fn=greet, inputs="text", outputs="text")

demo.launch(share=True, debug=True)

In [None]:
import gradio as gr

def greet(name):
    print(name)
    return 'Hello ' + name + '!'    

demo = gr.Interface(fn=greet, inputs='text', outputs='text')

demo.launch(share=True, debug=True)

In [None]:
import gradio as gr

def chat(msg, history):
    history.append((msg, "안녕. 반가워"))
    return "", history

with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    clear = gr.ClearButton([msg, chatbot])

    msg.submit(chat, [msg, chatbot], [msg, chatbot])

demo.launch()

In [None]:
from transformers import pipeline

generator = pipeline("text-generation", max_length=1000,
                     model="EasthShin/Youth_Chatbot_Kogpt2-base")

prompt = "안녕"
result = generator(prompt)
print(result)

In [None]:
import gradio as gr
from transformers import pipeline

generator = pipeline("text-generation", max_length=1000, model="EasthShin/Youth_Chatbot_Kogpt2-base")

def chat(msg, history):
    prompt = '\n'.join([f"유저: {h[0]}\n챗봇: {h[1]}" for h in history[-3:]])
    prompt = f"{prompt}\n유저: {msg}\n챗봇: "

    gen_text = generator(prompt)[0]['generated_text']
    history.append((msg, gen_text[len(prompt):]))

    return "", history

with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    clear = gr.ClearButton([msg, chatbot])

    msg.submit(chat, [msg, chatbot], [msg, chatbot])

demo.launch(debug=True)

# OpenAI 챗봇
https://openai.com/research/instruction-following

In [None]:
pip install openai

## gpt3 api
- openai.Completion.create
  - model
  - prompt

In [None]:
import openai

openai.api_key = "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"

response = openai.Completion.create(
    model="text-davinci-003",
    prompt="Hugging Face와 OpenAI를 비교해 줘.",
    temperature=0.7,
    max_tokens=1000
)
print(response.choices[0].text)

# chatgpt api
- openai.ChatCompletion.create
  - model
  - messages

In [None]:
# 지피티 챗 모델
import openai

openai.api_key = "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"

response = openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages=[
    {
      "role": "system",
      "content": "i want the basic python code"
    }
  ],
  temperature=1,
  max_tokens=256,
  top_p=1,
  frequency_penalty=0,
  presence_penalty=0
)


print(response.choices[0].message.content)

In [None]:
# 지피티 mode=complete
import openai

openai.api_key = "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"

response = openai.Completion.create(
  model="text-davinci-003",
  prompt="마이클 잭슨",
  temperature=1,
  max_tokens=256,
  top_p=1,
  frequency_penalty=0,
  presence_penalty=0
)

print(response.choices[0].text)

In [None]:
import openai

openai.api_key = "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"
response = openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages=[
        {"role": "system", "content": "TRPG 게임의 마스터 역할. 흥미로운 게임을 진행할 수 있도록 도와준다."},
        {"role": "user", "content": "나에게 먼저 질문을 해서 게임을 시작해줘."},
        {"role": "assistant", "content": "당신이 플레이할 캐릭터의 이름과 직업. 나이를 말씀해 주세요."},
        {"role": "user", "content": "블랙듀, 20살. 남성. 프로그래머"}
    ]
)

print(response.choices[0].message.content)

## with gradio

In [None]:
!pip install gradio

In [None]:
import gradio as gr
import openai

def 텍스트생성(prompt):
    response = openai.Completion.create(
        model="text-davinci-003",
        prompt=f"'{prompt}' 이 문장을 영어로 번역해줘",
        temperature=0.7,
        max_tokens=1024,
    )
    return response['choices'][0]['text'].strip()

demo = gr.Interface(fn=텍스트생성, inputs="text", outputs="text")
demo.launch()


In [None]:
import gradio as gr
import openai

def 텍스트생성(prompt):
    response = openai.Completion.create(
        model="text-davinci-003",
        prompt=f"'{prompt}' 삼행시를 지어줘",
        temperature=0.7,
        max_tokens=1024,
    )
    return response['choices'][0]['text'].strip()

demo = gr.Interface(fn=텍스트생성, inputs="text", outputs="text")
demo.launch()


In [None]:
def make_message(msg, history):
    messages = [{"role": "system", "content": "You are a helpful assistant."}]
    for h in history[-10:]:
        messages.append({"role": "user", "content": h[0]})
        messages.append({"role": "assistant", "content": h[1]})

    messages.append({"role": "user", "content": msg})
    return messages

history = [
    ("안녕", "안녕하세요. 무엇을 도와드릴까요?"),
]
make_message("매수 타이밍 좀...", history)

In [None]:
import gradio as gr
import openai

openai.api_key = "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"

def chat(msg, history):
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=make_message(msg, history)
    )
    history.append((msg, response['choices'][0]['message']['content']))
    return "", history

with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    clear = gr.ClearButton([msg, chatbot])

    msg.submit(chat, [msg, chatbot], [msg, chatbot])

demo.launch(debug=True, share=True)

In [None]:
import gradio as gr
import openai

openai.api_key = "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"


messages=[
    {"role": "system", "content": "넌 이제부터 내 오랜 친구야"},
]

def chat(msg, history):
    messages.append({"role": "user", "content": msg})
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "system", "content": "넌 이제부터 내 오랜 친구야"}, *messages[10:]],
        temperature = 1,
        max_tokens=512
    )
    messages.append({"role":"assistant", "content":response.choices[0].message['content']})
    print(messages)
    
    history.append((msg, response.choices[0].message['content']))
    return "", history

with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    send = gr.Button('Send')
    clear = gr.ClearButton([msg, chatbot])
    msg.submit(chat, [msg, chatbot], [msg, chatbot])
    send.click(chat, [msg, chatbot], [msg, chatbot])

demo.launch(share=True)

# 토큰화

In [None]:
from transformers import GPT2LMHeadModel, PreTrainedTokenizerFast
# 토큰화 함수
tokenizer = PreTrainedTokenizerFast.from_pretrained("taeminlee/kogpt2")
# 다음 단어 함수
model = GPT2LMHeadModel.from_pretrained("taeminlee/kogpt2")

In [98]:
text = "아들아 너는 계획이"
tokens = tokenizer.encode(text, return_tensors="pt")
tokens

tensor([[ 2656, 47485,  1681, 47441, 10635]])

In [99]:
outputs = model(tokens)[0][0, -1, :] # 모든 토큰들의 확률값
outputs

tensor([ -0.8730,   7.6543, -11.9663,  ...,  -7.1661,  -0.3533,  -2.2992],
       grad_fn=<SliceBackward0>)

In [100]:
token = outputs.argmax(-1) # 가장 높은 확률의 토큰
token

tensor(105)

In [101]:
decoded = tokenizer.decode(token)
decoded

'있'

In [104]:
from transformers import GPT2LMHeadModel, PreTrainedTokenizerFast
# 토큰화 함수
tokenizer = PreTrainedTokenizerFast.from_pretrained("taeminlee/kogpt2")
# 다음 단어 함수
model = GPT2LMHeadModel.from_pretrained("taeminlee/kogpt2")
text = "아들아 너는 계획이 다"
tokens = tokenizer.encode(text, return_tensors="pt")
tokens

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'GPT2Tokenizer'. 
The class this function is called from is 'PreTrainedTokenizerFast'.


tensor([[ 2656, 47485,  1681, 47441, 10635,   148]])

In [105]:
outputs = model.generate(tokens)
print(outputs[0])
generated_text = tokenizer.decode(outputs[0])
print(generated_text)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


tensor([ 2656, 47485,  1681, 47441, 10635,   148,   605,     1,     0,   155,
          872,  7892,  8274,   605,     1,     0,   155,   872,  7892,  8274])
아들아 너는 계획이 다 있어</s><s> 나 지금 집에 가고 있어</s><s> 나 지금 집에 가고
