In [1]:
!pip install flask

Collecting flask
  Downloading flask-3.1.2-py3-none-any.whl.metadata (3.2 kB)
Collecting blinker>=1.9.0 (from flask)
  Downloading blinker-1.9.0-py3-none-any.whl.metadata (1.6 kB)
Collecting click>=8.1.3 (from flask)
  Downloading click-8.2.1-py3-none-any.whl.metadata (2.5 kB)
Collecting itsdangerous>=2.2.0 (from flask)
  Downloading itsdangerous-2.2.0-py3-none-any.whl.metadata (1.9 kB)
Collecting werkzeug>=3.1.0 (from flask)
  Downloading werkzeug-3.1.3-py3-none-any.whl.metadata (3.7 kB)
Downloading flask-3.1.2-py3-none-any.whl (103 kB)
Downloading blinker-1.9.0-py3-none-any.whl (8.5 kB)
Downloading click-8.2.1-py3-none-any.whl (102 kB)
Downloading itsdangerous-2.2.0-py3-none-any.whl (16 kB)
Downloading werkzeug-3.1.3-py3-none-any.whl (224 kB)
Installing collected packages: werkzeug, itsdangerous, click, blinker, flask

   ---------------------------------------- 0/5 [werkzeug]
   ---------------------------------------- 0/5 [werkzeug]
   ---------------------------------------- 0/5 [

In [None]:
from flask import Flask, render_template_string, request
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

app = Flask(__name__)

# 모델 준비
model_name = "skt/kogpt2-base-v2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

chat_history = ""

@app.route("/", methods=["GET", "POST"])
def index():
    global chat_history
    bot_reply = ""
    if request.method == "POST":
        user_input = request.form["message"]
        chat_history += f"User: {user_input}\nBot:"
        inputs = tokenizer(chat_history, return_tensors="pt").to(device)
        # 업데이트된 대화 기록(chat_history)을 AI 모델이 이해할 수 있는 숫자 형태(텐서)로 변환하고, 
        # 이를 모델 연산이 수행될 장치(GPU 또는 CPU)로 이동시킴
        # 변환된 토큰 시퀀스를 PyTorch 텐서(Tensor) 형태로 반환하도록 지정합니다. (pt는 PyTorch를 의미합니다.)
        outputs = model.generate(
            **inputs,
            # max_new_tokens를 사용하여 새로 생성될 봇 응답의 최대 길이를 100~200 정도로 설정합니다.
            max_new_tokens=100, 
            pad_token_id=tokenizer.eos_token_id,
            do_sample=True,
            top_p=0.9,
            temperature=0.8
        )
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        bot_reply = response.split("Bot:")[-1].split("User:")[0].strip()
        chat_history += f" {bot_reply}\n"

    return render_template_string("""
        <h2>한국어 GPT2 챗봇</h2>
        <form method="post">
            <input name="message" placeholder="메시지를 입력하세요" style="width:300px">
            <input type="submit" value="Send">
        </form>
        <p><b>Bot:</b> {{bot_reply}}</p>
    """, bot_reply=bot_reply)

if __name__ == "__main__":
    app.run(debug=True)