In [1]:
import os
from dotenv import load_dotenv

# .env 파일의 환경 변수 로드
load_dotenv()

# 환경 변수 가져오기
openai_api_key = os.getenv("OPENAI_API_KEY")

In [23]:
# 필요한 라이브러리 설치

import openai
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI

# OpenAI LLM을 이용한 LangChain 구성
llm = ChatOpenAI(api_key=openai_api_key,
                 temperature=0.0, 
                 model='gpt-4o-mini')

### 유저의 입력 혹은 대화기록에서 선호도 데이터를 추출

#### 유저의 입력에 대해서

In [48]:
prompt = """You are a movie preference evaluator. When a user provides a preference for a specific movie, you will assess the given movie and return a JSON object that details the user's preference evaluation using English. The JSON object should include the following fields:
movie_title: The title of the movie based on user input.
preference_score: A numerical score indicating how much the user prefers the movie on a scale of 1 to 10.
comments: A brief explanation or comments about why the user has this preference score for the movie.

user input: {query}
JSON object: 
"""

prompt_template = PromptTemplate(template=prompt, input_variables=["query"])

In [49]:
preference_eliction_chain = prompt_template | llm

In [50]:
result_json = preference_eliction_chain.invoke({"query":"나는 어바웃타임을 진짜 너무 좋아해"}).content

In [42]:
import re
import json

# Regular expression to remove only the 'json' language identifier but keep the triple backticks
pattern = r'^```json\s*|\s*```$'

# Remove the 'json' identifier using re.sub
cleaned_text = re.sub(pattern, '', result_json, flags=re.DOTALL).strip()

# Parse the JSON string into a Python dictionary
parsed_data = json.loads(cleaned_text)

# Accessing the data
movie_title = parsed_data['movie_title']
preference_score = parsed_data['preference_score']
comments = parsed_data['comments']

# Print the values
print(f"Movie Title: {movie_title}")
print(f"Preference Score: {preference_score}")
print(f"Comments: {comments}")

Movie Title: 어바웃 타임
Preference Score: 9
Comments: 사용자는 어바웃 타임을 진짜 너무 좋아한다고 표현했으며, 이는 영화의 감정적 깊이와 시간 여행의 독특한 접근 방식에 대한 강한 애정을 나타냅니다.


In [46]:
# 함수로 정리해서 테스트
from typing import Dict

def get_preferece(query: str) -> Dict[str, str]:
    
    def post_process(result: str) -> Dict[str, str]:
        # Regular expression to remove only the 'json' language identifier but keep the triple backticks
        pattern = r'^```json\s*|\s*```$'

        # Remove the 'json' identifier using re.sub
        cleaned_text = re.sub(pattern, '', result, flags=re.DOTALL).strip()

        # Parse the JSON string into a Python dictionary
        parsed_data = json.loads(cleaned_text)
        
        return parsed_data
    
    llm_return = preference_eliction_chain.invoke({"query":query}).content
    result_json = post_process(llm_return)
    
    return result_json

In [47]:
get_preferece("어제 에일리언을 봤는데 꽤 재밌더라구. 엄청 흥미진진해서 시간가는 줄 몰랐어")

{'movie_title': 'Alien',
 'preference_score': 8,
 'comments': "The user found 'Alien' to be quite entertaining and thrilling, indicating a strong engagement with the film's suspenseful elements."}

In [51]:
get_preferece("나는 어바웃타임을 진짜 너무 좋아해")

{'movie_title': 'About Time',
 'preference_score': 9,
 'comments': "The user expresses a strong affection for 'About Time', indicating it resonates deeply with them, likely due to its heartfelt story and themes of love and time."}

In [52]:
get_preferece("신과함께 같은 영화는 너무 싫어.... 신파가 너무 많아")

{'movie_title': '신과함께',
 'preference_score': 2,
 'comments': 'The user dislikes this movie due to its excessive melodrama, which they find overwhelming.'}

In [54]:
get_preferece("내가 가장 좋아하는 영화는 노트북이야. 절절한 사랑 이야기가 너무 좋아")

{'movie_title': 'The Notebook',
 'preference_score': 9,
 'comments': "The user loves heartfelt love stories, and 'The Notebook' is a quintessential romantic film that beautifully captures deep emotional connections and enduring love."}