In [7]:
# 필요한 라이브러리 설치
!pip install -q transformers sentencepiece onnxruntime gradio

# onnx 패키지 설치
!pip install onnx

import re
import torch
import onnxruntime as ort
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import numpy as np
import pandas as pd
import gradio as gr
from openai import OpenAI



In [8]:
# KoBERT 모델과 토크나이저 로드
tokenizer = AutoTokenizer.from_pretrained("monologg/kobert")
model = AutoModelForSequenceClassification.from_pretrained("monologg/kobert", num_labels=3)

# ONNX 모델 저장 경로
onnx_model_path = "kobert_sentiment.onnx"

# 모델을 ONNX 형식으로 변환
torch.onnx.export(
    model,
    (torch.tensor([[1]]), torch.tensor([[1]])),
    onnx_model_path,
    input_names=["input_ids", "attention_mask"],
    output_names=["logits"],
    dynamic_axes={
        "input_ids": {0: "batch_size", 1: "sequence_length"},
        "attention_mask": {0: "batch_size", 1: "sequence_length"},
        "logits": {0: "batch_size"}
    },
    opset_version=14
)

# ONNX 모델 로드
ort_session = ort.InferenceSession(onnx_model_path)

The repository for monologg/kobert contains custom code which must be executed to correctly load the model. You can inspect the repository content at https://hf.co/monologg/kobert.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N] y


tokenization_kobert.py:   0%|          | 0.00/10.9k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/monologg/kobert:
- tokenization_kobert.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


tokenizer_78b3253a26.model:   0%|          | 0.00/371k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/77.8k [00:00<?, ?B/s]



config.json:   0%|          | 0.00/426 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/369M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at monologg/kobert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
# 감정 분석 함수
def predict_sentiment_onnx(text):
    inputs = tokenizer(text, return_tensors="np", padding=True, truncation=True, max_length=128)
    ort_inputs = {
        "input_ids": inputs["input_ids"],
        "attention_mask": inputs["attention_mask"]
    }
    logits = ort_session.run(None, ort_inputs)[0]
    probabilities = torch.nn.functional.softmax(torch.from_numpy(logits), dim=-1).detach().numpy()
    predicted_label = np.argmax(probabilities, axis=1)[0]
    confidence = probabilities[0][predicted_label]
    return predicted_label, confidence

In [10]:
# 호감도 점수 계산 함수
def calculate_affinity_score(messages):
    score = 0
    sentiment_weights = {
        "긍정": 1.0,
        "중립": 0.0,
        "부정": -1.0
    }
    for msg in messages:
        weight = sentiment_weights.get(msg["sentiment"], 0)
        score += weight * msg["confidence"]
    total_score = (score / len(messages)) * 50 + 50
    return max(0, min(total_score, 100))

In [11]:
# Gradio에서 사용할 요약 및 조언 함수
def analyze_conversation(file_path, api_key):
    # 메시지 전처리
    df = pd.read_csv(file_path, delimiter=",", encoding='utf-8')
    messages = [{"time": row['Date'], "sender": row['User'], "message": row['Message']} for _, row in df.iterrows()]

    # 감정 분석
    sentiment_labels = {0: "부정", 1: "중립", 2: "긍정"}
    for msg in messages:
        label, confidence = predict_sentiment_onnx(msg["message"])
        msg["sentiment"] = sentiment_labels.get(label, "알 수 없음")
        msg["confidence"] = confidence

    # 호감도 점수 계산
    affinity_score = calculate_affinity_score(messages)

    # 호감도 평가
    if 0 <= affinity_score <= 20:
        evaluation = "강한 부정"
    elif 21 <= affinity_score <= 40:
        evaluation = "약한 부정"
    elif 41 <= affinity_score <= 60:
        evaluation = "중립"
    elif 61 <= affinity_score <= 80:
        evaluation = "약한 긍정"
    elif 81 <= affinity_score <= 100:
        evaluation = "강한 긍정"

    # Upstage 솔라 API 설정
    client = OpenAI(api_key=api_key, base_url="https://api.upstage.ai/v1/solar")

    # 대화 요약
    conversation_text = "\n".join([f"{msg['sender']}: {msg['message']}" for msg in messages])
    summary_prompt = (
        "아래는 두 사람 간의 대화 내용입니다. 이 대화를 요약하여 주요 주제, 상대방의 관심사, 감정 등을 파악해주세요:\n\n"
        f"{conversation_text}\n\n요약:"
    )
    summary_result = client.chat.completions.create(
        model="solar-1-mini-chat",
        messages=[{"role": "system", "content": "당신은 전문 요약가입니다."},
                  {"role": "user", "content": summary_prompt}]
    )
    conversation_summary = summary_result.choices[0].message.content.strip()

    # 조언 생성
    prompt = (
        f"아래는 두 사람 간의 대화 요약입니다:\n\n{conversation_summary}\n\n"
        f"대화 상대의 호감도 점수가 {affinity_score:.2f}점이며, 평가 결과는 '{evaluation}'입니다. "
        f"이 요약된 대화를 바탕으로 상대방의 성향을 분석하고, 그에 따른 맞춤형 조언을 제공해주세요."
    )
    advice_result = client.chat.completions.create(
        model="solar-1-mini-chat",
        messages=[{"role": "system", "content": "당신은 연애 상담 전문가입니다."},
                  {"role": "user", "content": prompt}]
    )
    advice = advice_result.choices[0].message.content.strip()

    return conversation_summary, f"{affinity_score:.2f}", evaluation, advice

In [12]:
# Gradio 인터페이스 정의
gr.Interface(
    fn=analyze_conversation,
    inputs=[
        gr.File(label="카카오톡 채팅 파일 (CSV)"),
        gr.Textbox(label="Upstage API Key", type="password")
    ],
    outputs=[
        gr.Textbox(label="대화 요약"),
        gr.Textbox(label="호감도 점수"),
        gr.Textbox(label="호감도 평가"),
        gr.Textbox(label="조언")
    ],
    title="카카오톡 대화 분석 및 호감도 평가"
).launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://8b62909cabf96a1c2a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


