In [5]:
import os
import openai
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key  = os.getenv('OPENAI_API_KEY')

In [6]:
def get_completion(prompt, model="gpt-3.5-turbo"):
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message["content"]

def get_completion_from_messages(messages, model="gpt-3.5-turbo", temperature=0):
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature, # this is the degree of randomness of the model's output
    )
#     print(str(response.choices[0].message))
    return response.choices[0].message["content"]

In [7]:
def read_file_in_chunks(filename, chunk_size=3500):
    with open(filename, 'r', encoding='utf-8') as f:
        while True:
            chunk = f.read(chunk_size)
            if not chunk:
                break
            yield chunk

def process_with_chatgpt_api(chunk, chatgpt_api_func):
    # Here you would call your chatgpt api function with the chunk as input.
    response = chatgpt_api_func(chunk)
    return response

In [8]:
from datetime import datetime

def write_to_markdown(text):
    
    # Get the current date
    current_date = datetime.now()

    # Format the date as a string in the format "yymmdd"
    date_string = current_date.strftime('%y%m%d')

    # Create the filename
    filename = f"{date_string}-Stratechery.md"

    # Write the text to the file
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(text)      

In [15]:
def translate_article(file_path):

    chunks = list(read_file_in_chunks(file_path))
    length = len(chunks)
    
    translated = []
    for i, chunk in enumerate(chunks):
        
        messages =  [  
        {'role':'system', 'content':'You are an technology article professional translater at translating article from English to zh-hant-tw.'},
        {'role':'assistant', 'content':'Ok, I am a professional translator from English to zh-hant-tw.'}
        ]
        
        messages.append({'role':'user', 'content':f"""
        You are being provided a part of html code of an article, it is most likely a part of technology column, \
        but some times it will be something other than that, the content of the html is delimited in three backtrips below.
        
        The text you translate will be concat to other translated passage, \
        so make sure output the raw markdown text "only", "without" any other \
        content or original html code.
        
        You have to:
        1. Extract the passage from html code, remember, the passage might seems being cut in half, which is totally normal.
        2. Translate the passage into zh-hant-TW.
        3. Format the translated passage into markdown format with proper syntax highlighting.
       
        part of the passage: {i+1} / {length}
        content: ```{chunk}```
        """
        })
        
        response = get_completion_from_messages(messages, temperature=0)
        translated.append(response)    
    
    return translated

In [None]:
translated = translate_article('.\\0502.html')
text = ''.join(translated)
write_to_markdown(text)