## 라이브러리 & API Key 정의

In [3]:
from openai import OpenAI
from dotenv import load_dotenv
import os
import re

load_dotenv() # .env 파일 로드
my_api_key = os.getenv("API_KEY") # 환경 변수에서 API 키 불러오기

client = OpenAI(
    api_key = my_api_key
)

## Instuction 정의

In [5]:
# negative용
instruction_1 = """
You are a recommendation bot responsible for selecting the news article that the target user is most likely to prefer from a list of five candidate articles. The only information available for each candidate article is its title, which is written in Norwegian.

Your goal is to predict the index number of the news article that best fits in the position labeled [MASK].
"""

instruction_negative = """
You are a bot that identifies users' news interests from [News of Interest to the user], then based on this, predicts the index number of news in [Questions] that best fits in the position labeled [MASK].

News is provided by title only.
News is Norwegian news in Norwegian.

There can be multiple lists in [News of Interest to the user], each with five news items.
Among the five news in each list, there is one news that the user is most interested in.

[Questions] can have multiple questions, each of which must be answered.
The answer should return only one news that the user is most likely to read.
"""

instruction_positive = """
You are a bot that identifies users' news interests from [Click History], then based on this, predicts the index number of news in [Questions] that best fits in the position labeled [MASK].

News is provided by title only.
News is Norwegian news in Norwegian.

[Questions] can have multiple questions, each of which must be answered.
The answer should return only one news that the user is most likely to read.
"""

## inference 함수 정의

In [2]:
def inference(purpose, target_folder, result_file_name, gpt_model, user_list, max_attempts):

    # instruction 정의
    if purpose == 'with_negative': 
        instruction = instruction_negative
    elif purpose == 'only_positive':
        instruction = instruction_positive

    # User Prompt가 위치한 폴더 및 metadata 파일 경로 설정
    target_folder = f'../../prompts/{target_folder}'
    directory = f'{target_folder}/{purpose}'
    meta_file_path = f'{target_folder}/{purpose}/metadata/output_metadata.txt'
    user_question_counts = {}
    
    # metadata 파일을 읽어 user별 question 수 저장
    with open(meta_file_path, 'r', encoding='utf-8') as meta_file:
        for line in meta_file:
            match = re.match(r'User ID:\s*U(\d+).*Question 수:\s*(\d+)', line)
            if match:
                user_id = int(match.group(1))
                question_count = int(match.group(2))
                user_question_counts[user_id] = question_count

    # 실험 실행
    with open(f'../../results/gpt_result/{result_file_name}', 'w', encoding='utf-8') as result_file:
        # user list에서 각 user에 대해 처리
        for cnt, i in enumerate(user_list):
            filename = f'U{i}.txt'
            filepath = os.path.join(directory, filename)
            
            # 파일 존재 여부 확인
            if os.path.isfile(filepath):
                # 파일 내용 읽기
                with open(filepath, 'r', encoding='utf-8') as f:
                    contents = f.read()

                # user의 question 수 설정
                expected_question_count = user_question_counts.get(i)
                if expected_question_count is None:
                    print(f"사용자 U{i}의 질문 수를 찾을 수 없습니다.")
                    continue  # 다음 사용자로 넘어감
                
                # API 요청 준비
                initial_messages = [
                    {"role": "system", "content": instruction},
                    {"role": "user", "content": contents}
                ]
                messages = initial_messages.copy()
                attempt = 0

                # 최대 시도 횟수를 넘지 않았으면 실행
                while attempt < max_attempts:
                    attempt += 1
                    # API 호출
                    try:
                        response = client.chat.completions.create(
                            model=gpt_model,
                            messages=messages
                        )
                    except Exception as e:
                        print(f"API 호출 중 오류 발생 (사용자 {i}): {e}")
                        break  # 다음 사용자로 넘어감
                    
                    # 응답 내용 추출
                    response_text = response.choices[0].message.content.strip()
                    
                    
                    result_file.write(f'[U{i}]\n')
                    result_file.write(response_text + '\n\n')
                    if (cnt+1) % 20 == 0:
                        print(f'☆ {purpose} U{i} 까지 완료 [{cnt+1}/{len(user_list)}] ☆')  
                    break  # 루프 종료
                    
            else:
                print(f'파일 {filepath} 이 존재하지 않습니다.')
        print(f'{purpose} 완료 : {result_file_name}\n')


In [7]:
user_range = 1000
users = [i for i in range(1, user_range + 1)]

users = [123, 191, 198, 322, 432, 556, 608, 950]

# # 실행
# inference(purpose='only_positive', 
#           target_folder='[241227-3] 1~1000', 
#           result_file_name='[241227-3] positive2.txt',
#           gpt_model='gpt-4o-mini', 
#           user_list=users, 
#           max_attempts=1
#           )

inference(purpose='with_negative', 
          target_folder='[241230-5] 1~1000',
          result_file_name='[241230-5] negative_2_2.txt',
          gpt_model='gpt-4o-mini', 
          user_list=users, 
          max_attempts=1
          )

with_negative 완료 : [241230-5] negative_2_2.txt

