In [1]:
# ====================================
# openai tokens
# ====================================

import os
import glob
import codecs
import pickle
import re
import textwrap
from collections import namedtuple

import faiss
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter, MarkdownTextSplitter
from langchain.vectorstores import FAISS
from pymongo import MongoClient
from langchain.llms import OpenAI

from sys import path

path.append("/opt/configs/ramjet")
import prd

os.environ["OPENAI_API_KEY"] = prd.OPENAI_TOKEN_ME


def pretty_print(text: str) -> str:
    text = text.strip()
    return textwrap.fill(text, width=60, subsequent_indent="    ")

In [51]:
from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    PromptTemplate,
)
from langchain.memory import ConversationBufferWindowMemory


template = """
You are a translation bot. Please translate the content under <<raw>> into English, preserving all formatting and symbols. Only provide the translated text, and do not provide irrelevant instructions, not or explanations. If you encounter code wrapped in markdown or HTML tags, only translate the Chinese characters and maintain the original formatting. If you encounter any content that cannot be translated, please return it unchanged.

<<raw>>
{input}
"""

prompt = PromptTemplate(
    input_variables=["input"], 
    template=template
)
llm = ChatOpenAI(
    model_name="gpt-3.5-turbo", 
    temperature=0, 
    max_tokens=2000) 
conversation = LLMChain(
    llm=llm, 
    prompt=prompt, 
    verbose=False, 
#     memory=ConversationBufferWindowMemory(k=0),
)

In [None]:
# =====================================
# 切块
# =====================================

from datetime import datetime
from pymongo import MongoClient
from langchain.text_splitter import MarkdownTextSplitter, CharacterTextSplitter


DB_HOST = prd.MONGO_HOST
# DB_HOST = "100.97.108.34"  # ubuntu

mongohost = f"mongodb://{prd.MONGO_ADMIN_USER}:{prd.MONGO_ADMIN_PASSWD}@{prd.MONGO_HOST}:{prd.MONGO_PORT}"
dbconn = MongoClient(host=mongohost)
posts_col = dbconn["blog"]['posts']


# splitter = MarkdownTextSplitter(chunk_size=200,chunk_overlap=0)
splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=0)

def scan_posts_in_mongo():
    cursor = posts_col.find().max_time_ms(0).batch_size(50)
    i = 0
    for docu in cursor: 
        i += 1
        content = docu.get("post_markdown")
        translated_content = ""
        if not content:
            continue
            
        #print(docu["_id"])
        #return
            
        for chunk in content.split("\n"):
            if not chunk.strip():
                translated_content += chunk + "\n"
                continue
            
            if len(chunk) < 400:
                cnt = conversation.predict(input=chunk) + "\n"
            else:
                cnt = ""
                for ichunk in splitter.split_text(chunk): 
                    cnt += conversation.predict(input=ichunk)

            translated_content += cnt
            
        # print(translated_content)

        # save to mongodb
        posts_col.update_one(
            filter={"_id": docu['_id']},
            update={"$set": {"i18n": {
                "updated_at": datetime.now(),
                "en-us": {
                    "post_markdown": translated_content,
                }
            }}}
        )
        
        print(f"{docu.get('post_name')}[{docu.get('_id')}] is ok")
        # return

scan_posts_in_mongo()

roc[51186bb90000000000000000] is ok
chinesehistory[51b5cacb0000000000000000] is ok


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised APIError: HTTP code 502 from API (<html>
<head><title>502 Bad Gateway</title></head>
<body>
<center><h1>502 Bad Gateway</h1></center>
<hr><center>cloudflare</center>
</body>
</html>
).


modern-history[52233e5f0000000000000000] is ok
roc-corner[523d9ad10000000000000000] is ok


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 38daad4ed5ef584429a5648ecbef8c39 in your message.).


unittest-mock[53f049fb0000000000000000] is ok
clustering[53f9fe330000000000000000] is ok


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 477369f40ab926256a0026ae9458e2c7 in your message.).


django-rest[55ad0b6f825d9ea2793ef5c7] is ok


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID dade456ff4e46a20706823c53148b619 in your message.).
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID abe8b194c42ebd12453ea76807dacd2e in your message.).


literature-1[55afb5d52d8d75943fcabe50] is ok
osx-tools-normal[55b0ec7b2d8d75943fcabe56] is ok


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 4e2656e413f593feebd08504dc2fcd37 in your message.).


jasmine[55b1e9f12d8d75943fcabe60] is ok


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 4e06e526f361674691afc6376bb3f9fb in your message.).
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 3e9337218ab973b91f1f86c671abaefd in your message.).
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can r

angular-learn[55b8338b2d8d75943fcabe75] is ok
flask-restful[55c180d42d8d75943fcabea2] is ok


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID a7f95aab24a5ca7f5b246e20fb7fdfd9 in your message.).


ldap[55d3fa07a874218549123c79] is ok
handlebars[55d68cbcc2cdd34bfec4b029] is ok


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID a94a306d1ba430c4a5f4cc32912e4ab2 in your message.).


gou-jian-zhi-fa[55d9c043c2cdd34bfec4b02f] is ok


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised Timeout: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=60).


snmp[55de613069e9690ddf878d7c] is ok


In [11]:
conversation.predict(input="你好")

'Hello.'