In [20]:
import os
import time
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()

False

In [21]:
import fasttext


def detect_language(text):
    model = fasttext.load_model('./data/lid.176.ftz')
    predictions = model.predict(text, k=1)
    lang = predictions[0][0].split('__')[-1]
    return lang


detect_language("안녕하세요")

'ko'

In [22]:
from langchain_openai import ChatOpenAI
from langchain_upstage import ChatUpstage
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
import pandas as pd

In [37]:
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

chat_openai = ChatOpenAI(model_name="gpt-4o-mini")

# Multi Language to English Translation by ChatOpenAI
translate_to_en_prompt = ChatPromptTemplate.from_template(
    "Translate the following text to English. If it's already in English, return it as is: {text}"
)
translate_to_en_chain = translate_to_en_prompt | chat_openai | StrOutputParser()
     
# Eng to Ko Translation Chain
translate_to_ko_prompt = ChatPromptTemplate.from_template(
    """You are a professional translator who is fluent in English and Korean.
    Your task is to translate the following text to Korean accurately and naturally.
    Please pay close attention to grammar and idiomatic expressions.
    You must only respond in Korean and should never add any additional English sentences to your responses.
    If the text is Korean, you must return text without any changes.
    The text to translate is: {text}"""
)

# Chain Creation for translate_to_ko function
translate_to_ko_chain = translate_to_ko_prompt | chat_openai | StrOutputParser()

# English to Korean
def translate_to_ko(text):
    return translate_to_ko_chain.invoke({"text": text})

In [39]:
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from typing import List
import json


# Pydantic Model
class SentimentAnalysis(BaseModel):
    overall_sentiment: str = Field(
        description="The overall sentiment of the review (Very Positive, Positive, Negative, Very Negative, or Neutral)")
    key_points: List[str] = Field(description="List of key points extracted from the review")


# Pydantic Output Parser
parser = PydanticOutputParser(pydantic_object=SentimentAnalysis)

# Output Key Point and Sentimental Analysis Prompt 
extract_points_sentiment_prompt = ChatPromptTemplate.from_template(
    """You are analyzing a review for the 'Book Creator Guide' GPT model. Your task is to extract key points from the given review text and determine the overall sentiment.

    Review: {text}

    Instructions:
    1. Determine the overall sentiment of the review.
    2. You must select one of the following options and reply in Korean ONLY: "매우 긍정적", "긍정적", "중립적", "부정적", or "매우 부정적".
    3. Never include any additional sentences, explanations, or examples in English.
    4. Only return the option selected.
    5. Extract up to 3 key points from the review that align with this overall sentiment.
    6. Each point must be directly derived from the review text and should reflect the tone and sentiment of the original review.
    7. If the review is very short or lacks detail, it's okay to extract fewer than 3 points.
    8. If you can't find any clear points, provide a single point stating "No specific points could be extracted from this short review."

    {format_instructions}

    Ensure that your response is a valid JSON object with 'overall_sentiment' and 'key_points' fields.

    Analysis:"""
)

# Chain
extract_points_sentiment_chain = extract_points_sentiment_prompt | chat_openai | parser

# suffix remover
def remove_korean_suffix(text):
    suffix_list = ["입니다.", "요.", "예요.", "입니다", "요"]
    for suffix in suffix_list:
        if text.endswith(suffix):
            return text[:-len(suffix)]
    return text

In [40]:
# Review List
reviews = [
    "This is FANTASTICO! I've wanted to write books my entire life, but lack the executive functioning skills to ever know where to begin. This AI book creator does all the things my ADHD brain can't and all I have to do is punch in the ideas.",
    "fluixet en la representación d'imatges",
    "Muadili diğer uygulamalar ile kıyaslanamayacak kadar güzel. Lütfen Microsoft un bu uygulamanın içine sıçmasına izin vermeyin, teşekkürler",
    "buono il risultato ma la storia dovrebbe essere maggiormente dettagliata",
    "j'adore",
    "感觉还是不行",
    "świetne",
    "no logic. no consistency. confused very easily.",
    "가톨릭에서는 마리아와 성인을 숭배하는 것이 아니라 신앙의 모범으로 공경하고 있습니다. 한국어로 숭배하다라고 해석하는 것은 신으로 숭배하는 것으로 오해를 불러일으킬 수 있는 번역입니다. 따라서 공경하다로 수정하여야 합니다.",
]

In [41]:
# --- Workflow ---
workflow = (
    {"text": RunnablePassthrough()}  # 1. Original Text Input (input: {"text": review})
    | {"lang": lambda x: detect_language(x["text"]), "text": lambda x: x["text"]}  # 2. Language Detection (output: {"lang": detected_lang, "text": original_text})
    | RunnablePassthrough.assign(en_text=lambda x: translate_to_en_chain.invoke({"text": x["text"]}) if x["lang"] != "en" else x["text"])  # 3. Translation to Eng or stay original language (output: {"lang": detected_lang, "text": original_text, "en_text": translated_en_text or original_text})
    | RunnablePassthrough.assign(analysis=lambda x: extract_points_sentiment_chain.invoke({"text": x["en_text"], "format_instructions": parser.get_format_instructions()})) # 4. Sentimental Analysis and Key Point Extraction (output: {"lang": detected_lang, "text": original_text, "en_text": translated_en_text or original_text, "analysis": SentimentAnalysis object})
    | {
        "ko_sentiment": lambda x: translate_to_ko_chain.invoke({"text": x["analysis"].overall_sentiment}),  # 5. Analysis result translation to Kor (output: "매우 긍정적", "긍정적", "중립적", "부정적", "매우 부정적" 중 하나)
        "ko_points": lambda x: [translate_to_ko_chain.invoke({"text": point}) for point in x["analysis"].key_points],  # 6. Translation of the Keypoint to Kor (output: [translated_ko_point1, translated_ko_point2, ...])
        "ko_review": lambda x: translate_to_ko_chain.invoke({"text": x["en_text"]}) if x["lang"] != "ko" else x["text"], # 7. Korean Translation from English Review (output: translated_ko_review or original_text)
        "original_text":lambda x: x["text"], # 8. original text Extraction
        "detected_language":lambda x: x["lang"] # 9. detected language Extraction

    } # 10. Combination all work results (output: {"ko_sentiment": translated_ko_sentiment, "ko_points": translated_ko_points, "ko_review": translated_ko_review, "original_text": original_text, "detected_language": detected_language})
    | {"원문":lambda x: x["original_text"], "감지된 언어":lambda x: x["detected_language"], "한국어 리뷰": lambda x: x["ko_review"], "전체 감성": lambda x: remove_korean_suffix(x["ko_sentiment"]), "주요 포인트": lambda x: x["ko_points"]}  # 11. Final Result Formatting (output: {"원문": original_text, "감지된 언어": detected_language, "한국어 리뷰": translated_ko_review, "전체 감성": translated_ko_sentiment, "주요 포인트": translated_ko_points})
)

print("============ 워크플로우 종료 ============")

# --- Main Execution ---
results = workflow.batch([{"text": review} for review in reviews])

df = pd.DataFrame(results)




In [42]:
df

Unnamed: 0,원문,감지된 언어,한국어 리뷰,전체 감성,주요 포인트
0,This is FANTASTICO! I've wanted to write books...,en,"이것은 환상적이에요! 저는 평생 책을 쓰고 싶었지만, 어디서 시작해야 할지 알 수 ...",매우 긍정적,"[책을 쓰고 싶었지만, 시작할 방법을 몰랐다., AI 책 생성기가 ADHD 브레인으..."
1,fluixet en la representación d'imatges,ca,이미지 표현의 희미함,부정적,[이미지 표현이 희미하다.]
2,Muadili diğer uygulamalar ile kıyaslanamayacak...,tr,동등한 애플리케이션은 다른 애플리케이션들과 비교할 수 없을 만큼 아름답습니다. 이 ...,매우 긍정적,"[이 애플리케이션은 다른 애플리케이션과 비교할 수 없을 만큼 아름답다., Micro..."
3,buono il risultato ma la storia dovrebbe esser...,it,"결과는 좋지만, 이야기가 좀 더 자세했으면 좋겠다.",긍정적,"[결과가 좋다., 이야기가 더 자세해야 한다.]"
4,j'adore,fr,나는 사랑해.,매우 긍정적,[나는 사랑해.]
5,感觉还是不行,ja,나는 여전히 괜찮지 않다고 느껴.,부정적,[전반적으로 좋지 않다고 느낀다.]
6,świetne,pl,멋진,매우 긍정적,"[작품의 질이 뛰어나다., 사용자가 만족하는 경험을 제공한다., 강력하게 추천할 만..."
7,no logic. no consistency. confused very easily.,en,논리가 없다. 일관성이 없다. 매우 쉽게 혼란스러워한다.,매우 부정적,"[논리가 없다., 일관성이 없다., 아주 쉽게 혼란스러워진다.]"
8,가톨릭에서는 마리아와 성인을 숭배하는 것이 아니라 신앙의 모범으로 공경하고 있습니다...,ko,가톨릭에서는 마리아와 성인을 숭배하는 것이 아니라 신앙의 모범으로 공경하고 있습니다...,중립적,[가톨릭에서 마리아와 성인들은 신으로서 숭배되지 않고 믿음의 본보기로서 존경받는다....


In [46]:
import fasttext
import pandas as pd

# Loading Model
model = fasttext.load_model('./data/lid.176.ftz')

df = pd.DataFrame({'text': ["Hello 안녕하세요 그렇지요?this is a test"]})

# 언어 예측
predictions = df['text'].apply(lambda x: model.predict(x))
predictions

0    ((__label__ko,), [0.8072137236595154])
Name: text, dtype: object