In [1]:
import openai
import os
import json

from typing import List, Dict, Any
from pathlib import Path
from dataclasses import dataclass

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

openai.api_key = os.getenv('OPENAI_API_KEY')

def print_engine_list():
    print(openai.Engine.list())

global_path: Path = Path("../../source/staticDataStorage/data/Global.json").resolve()

def load_global_static_data() -> Dict[str, Any]:
    return json.loads(global_path.read_text())

def get_terms(globalJson: Dict[str, Any]) -> List[str]:
    ret: List[str] = []

    for item in globalJson['terms']:
        termDef = item['termDef']
        term = termDef.split(" - ")[0]
        ret.append(term)

    return ret

def get_terms_string() -> str:
    globalJson = load_global_static_data()
    terms = get_terms(globalJson)
    return ", ".join(terms)

def print_lines(text: str, max_line_length: int = 120):
    lines: List[str] = []
    line = ""
    for word in text.split():
        if len(line + word) > max_line_length:
            lines.append(line)
            line = ""
        line += word + " "
    lines.append(line)

    for line in lines:
        print(line)


@dataclass
class TermData:
    area: str | None
    termDef: str


class StaticStorage:
    def __init__(self, path: Path):
        self.path = path
        json_data = json.loads(self.path.read_text())
        self.name = json_data['name']
        self.terms: List[TermData] = []
        for term in json_data['terms']:
            area = term['area'] if 'area' in term else None
            term_data = TermData(area, term['termDef'])
            self.terms.append(term_data)

    def save(self):
        dict_data = {
            "name": self.name,
            "terms": []
        }

        for term in self.terms:
            term_dict = {}
            if term.area:
                term_dict["area"] = term.area
            term_dict["termDef"] = term.termDef
            dict_data["terms"].append(term_dict)

        self.path.write_text(json.dumps(dict_data, indent=4, ensure_ascii=False) + "\n")


class ChatEngine:
    model = ""
    temp = 0

    def __init__(self, model: str = "gpt-3.5-turbo", temp: float = 0):
        self.model = model
        self.temp = temp

@dataclass
class Message:
    role: str
    content: str

    def to_dict(self) -> Dict[str, str]:
        return {"role": self.role, "content": self.content}

class TermChat:
    system_message: Message
    messages: List[Message] = []
    engine: ChatEngine

    def __init__(self, engine: ChatEngine, system_message: str):
        self.engine = engine
        self.system_message = Message("system", system_message)

    def _get_messages(self) -> List[Dict[str, str]]:
        ret: List[Dict[str, str]] = []
        ret.append(self.system_message.to_dict())

        for message in self.messages:
            ret.append(message.to_dict())

        return ret

    def get_answer(self, user_message: str = "") -> str | None:
        if user_message != "":
            self.messages.append(Message("user", user_message))

        response = openai.chat.completions.create(
            model=self.engine.model,
            messages=self._get_messages(),
            temperature=self.engine.temp,
        )
        answer = response.choices[0].message.content
        answer = answer if answer else ""
        # self.messages.append({"role": "assistant", "content": answer})
        self.messages.append(Message("assistant", answer))
        return answer

    def clear_messages(self):
        self.messages = []


In [7]:
# Prompts

continue_term_prompt = """
You are a terms helper robot
your task is to guess, based on the list of terms,
which one term, not in the list,
is the most relevant to already given terms. Answer with one term please.
"""

give_definition_prompt = """
You are a terms definition robot
your task is to give a best possible definition to the user requested term.

You should give a definition step by step, according to the this rules:

1. Definition should contain only information about what the term intrinsically is,
without any information about what the term is not, and what the term is related to.
Every entity that can be removed from definition without changing the meaning, should be removed.
2. Definition should based on the existing list of terms (list would be provided to you).
3. Check if there exist terms that also fit this definition and are not synonymous to the term.
4. Definition should be as short as possible.

List of existing terms: '''{0}'''

User requested definition for term: '''{1}'''

Format of answer should be step by step thoughts:
Raw term definition: <try to define user term for the first time. format 'term - definition'> <new line>
Thoughts about definition: <your thoughts on how this term should be redefined according to rules> <new line>

First retry: <try to define user term for the second time. format 'term - definition'> <new line>
Thoughts about definition: <your thoughts on how this term should be redefined according to rules> <new line>

Final try: <try to define user term for the third time. format 'term - definition'> <new line>
"""

translate_term_prompt = """
You are a GPT that is specified in term translation.
Your task is to translate terms from russian to english, saving their format.

Format description:
- TermDef format: each term should start with a capital letter, and the definition should start with a lowercase letter.
- Preserve brackets: If in definition some terms are in brackets, you should save them in brackets in the translation.
- Keyword Usage: Each definition is carefully crafted with specific details
- Scientific answers: each definition should be as precise, scientific and dry as possible. Expect that your user knows difficult conceptions and you should use them. your task is just with definition show how this conceptions are related.
- If there is no provided definition on russian, you should provide a definition in english based on the term itself
Answer with one term please.
"""

In [None]:
def predict_next_term():
    engine = ChatEngine(temp=0.5)
    chat = TermChat(engine, continue_term_prompt)
    terms_str = get_terms_string()
    answer = chat.get_answer(terms_str)
    print_lines(answer)
    chat.clear_messages()


predict_next_term()

In [None]:
def give_definition(new_term):
    terms_str = get_terms_string()

    engine = ChatEngine(temp=0.2)
    chat = TermChat(engine, give_definition_prompt.format(terms_str, new_term))

    answer = chat.get_answer()
    print(answer)
    chat.clear_messages()

give_definition("Up quark")

In [None]:
def add_term(term, definition, area=""):
    globalJson = load_global_static_data()

    termDef = '{0} - {1}'.format(term, definition)
    if area != "":
        globalJson['terms'].append({"area": area, "termDef": termDef})
    else:
        globalJson['terms'].append({"termDef": termDef})

    with open(global_path, "w") as f:
        json.dump(globalJson, f, indent=4)

add_term("Symmetry", f"{{system}} {{property}} that remains unchanged after a certain {{transformation}}", "phys")

In [2]:
# Transfer all terms from Biochemistry to Global with translation

biochemistry_path: Path = Path("../../source/staticDataStorage/data/Biochemistry.json").resolve()
biochemistry_storage = StaticStorage(biochemistry_path)
global_storage = StaticStorage(global_path)

bio_term_count = len(biochemistry_storage.terms)
for i in range(bio_term_count):
    term_data = biochemistry_storage.terms.pop(0)

    engine = ChatEngine(temp=0.5)
    translator = TermChat(engine, translate_term_prompt)
    print(f"Translating term {i}: {term_data.termDef}")
    answer = translator.get_answer(term_data.termDef)
    term_data.termDef = answer if answer else term_data.termDef
    print(f"Translated term {i}: {term_data.termDef}")

    if term_data.area is None:
        term_data.area = "chem"

    global_storage.terms.append(term_data)
    biochemistry_storage.save()
    global_storage.save()
    if i % 10 == 0:
        print(f"Transferred {i} terms")

    translator.clear_messages()

    if i > 50:
        break

Translating term 0: Макроспорогенез - 
Translated term 0: Macrosporogenesis - 
The process of formation and development of the female gametophyte in plants, leading to the production of megaspores through meiosis.
Transferred 0 terms
Translating term 1: Макрогематогенез - 
Translated term 1: Macrohematogenesis - 
The process of blood cell formation, specifically referring to the production of red blood cells, white blood cells, and platelets in the bone marrow.
Translating term 2: Односпорический зародышевый мешок - 
Translated term 2: Monosporic embryo sac - 
A structure in plants that contains the female gametophyte and develops into the embryo after fertilization. It is formed from a single spore and plays a crucial role in sexual reproduction in angiosperms.
Translating term 3: Биспорический зародышевый мешок - 
Translated term 3: Bisporic embryo sac - 
A structure in plants that contains the female gametophyte and develops into the embryo after fertilization. It is formed from two