# core

> Fill in a module description here

In [None]:
#| default_exp core

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import concurrent.futures

from claudette import *
from fastcore.utils import *
from ratelimit import limits, sleep_and_retry
from tqdm import tqdm

In [None]:
#| export
class FastData:
    def __init__(self,
                 model: str = "claude-3-haiku-20240307",
                 calls: int = 100,
                 period: int = 60):
        self.cli = Client(model)
        self.set_rate_limit(calls, period)

    def set_rate_limit(self, calls: int, period: int):
        """Set a new rate limit."""
        @sleep_and_retry
        @limits(calls=calls, period=period)
        def rate_limited_call(prompt: str, schema, sp: str):
            return self.cli.structured(
                prompt,
                ns=globals(),
                temp=1,
                tools=schema,
                tool_choice=schema
            )[0]
        
        self._rate_limited_call = rate_limited_call

    def generate(self, 
                 prompt_template: str, 
                 inputs: list[dict], 
                 schema,
                 sp: str = "You are a helpful assistant.",
                 max_workers: int = 64) -> list[dict]:
        
        def process_input(input_data):
            try:
                prompt = prompt_template.format(**input_data)
                response = self._rate_limited_call(
                    prompt=prompt,
                    schema=schema,
                    sp=sp
                )
                return response
            except Exception as e:
                print(f"Error processing input: {e}")
                return None

        results = []
        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
            futures = [executor.submit(process_input, input_data) for input_data in inputs]
            for future in tqdm(concurrent.futures.as_completed(futures), total=len(inputs)):
                result = future.result()
                results.append(result)
        
        return results

In [None]:
class Translation():
    """Translation from an English phrase to a German phrase"""
    def __init__(self, english: str, german: str): store_attr()
    
    __repr__ = basic_repr(["english", "german"])

Translation("Hello, how are you today?", "Hallo, wie geht es Ihnen heute?")

__main__.Translation(english='Hello, how are you today?', german='Hallo, wie geht es Ihnen heute?')

In [None]:
from datasets import load_dataset

examples = [
    Translation(
        english="Hello, my name is Nathan. I am a research scientist at an AI startup.",
        german="Hallo mein Name ist Nathan. Ich bin wissenschaftlicher Mitarbeiter bei einem KI-Startup."),
    Translation(
        english="How much wood could a woodchuck chuck if a woodchuck could chuck wood?",
        german="Wie viel Holz könnte ein Waldmurmeltier einspannen, wenn ein Waldmurmeltier Holz einspannen könnte?"),
    Translation(
        english="Thomas Cranmer (2 July 1489 - 21 March 1556) was a leader of the English Reformation and Archbishop of Canterbury during the reigns of Henry VIII, Edward VI and, for a short time, Mary I. He helped build the case for the annulment of Henry's marriage to Catherine of Aragon, which was one of the causes of the separation of the English Church from union with the Holy See.",
        german="Thomas Cranmer (2. Juli 1489 - 21. März 1556) war ein Anführer der englischen Reformation und Erzbischof von Canterbury während der Herrschaft von Heinrich VIII., Eduard VI. und für kurze Zeit auch Maria I. Er half bei der Ausarbeitung der Klage für die Aufhebung von Heinrichs Heirat mit Katharina von Aragon, die eine der Ursachen für die Trennung der englischen Kirche von der Union mit dem Heiligen Stuhl war."
    ),
]
examples = "\n- ".join([f"{e.english} -> {e.german}" for e in examples])

# Load personas
personas = load_dataset("proj-persona/PersonaHub", "persona", split='train').select(range(3))['persona']

sp = "You will help generate synethetic data of English and German phrases."
prompt_template = """\
Here are some examples:
{examples}

Create an english and german translation pair that is similar to the examples and would be appropriate for the following persona: {persona}
"""

# Generate translations
fast_data = FastData(model="claude-3-haiku-20240307")
translations = fast_data.generate(
    prompt_template=prompt_template,
    inputs=[{"persona": persona, "examples": examples} for persona in personas],
    schema=Translation,
    sp=sp
)

print("Translations:")
print(translations)

100%|██████████████████████████████████████████████████████████████████████| 3/3 [00:02<00:00,  1.48it/s]

Translations:
[__main__.Translation(english='Postpartum complications can be life-threatening, but many are preventable with proper education and care. As a maternal health advocate, I work to raise awareness and support new mothers.', german='Geburtskomplikationen können lebensbedrohlich sein, aber viele sind durch richtige Aufklärung und Versorgung vermeidbar. Als Verfechterin der Gesundheit von Müttern arbeite ich daran, das Bewusstsein zu schärfen und junge Mütter zu unterstützen.'), __main__.Translation(english="As a legal advisor, it's crucial to ensure that project documentation is complete and accurate to avoid potential legal issues down the line. Incomplete or inaccurate documentation can lead to misunderstandings, disputes, and even legal liabilities.", german='Als Rechtsberater ist es von entscheidender Bedeutung, dass die Projektdokumentation vollständig und korrekt ist, um mögliche rechtliche Probleme in der Zukunft zu vermeiden. Unvollständige oder ungenaue Dokumentation




In [None]:
class TranslationCritique():
    """
    A critique of the translation.
    """
    def __init__(
        self,
        critique: str, # A critique of the translation.
        score: int # A score of the translation from 1 to 5. 
    ): store_attr()
        
    __repr__ = basic_repr(['critique', 'score'])

sp = "You will help critique synethetic data of English and German phrases."
critique_template = """\
Below is an extract of a translation. Evaluate its quality as a senior translator would, considering its suitability for professional use. Use the additive 5-point scoring system described below. Points are accumulated based on the satisfaction of each criterion:

- Add 1 point if the translation conveys the basic meaning of the source text, even if it includes some minor errors or awkward phrasing.
- Add another point if the translation is generally accurate but lacks refinement in style or fails to capture some nuances of the original. It might use inconsistent terminology or have occasional lapses in register.
- Award a third point if the translation is appropriate for professional use and accurately conveys key concepts of the source text. It demonstrates good understanding of both languages, though it may not be flawless or could include some slight inconsistencies. It resembles the work of a competent translator but may have room for improvement in fluency or precision.
- Grant a fourth point if the translation is highly accurate and reads naturally in the target language, exhibiting a consistent and appropriate style. It could be similar to the work of an experienced translator, offering faithful rendering of content and tone, with minimal errors, and effectively handling complex concepts or cultural references. The result is coherent, well-expressed, and valuable for its intended purpose.
- Bestow a fifth point if the translation is outstanding, demonstrating mastery of both source and target languages. It captures subtle nuances, maintains the author's voice and intent, and reads as if it were originally written in the target language. The translator has made excellent choices in dealing with challenging elements like wordplay, idiomatic expressions, or culture-specific content.

The translation extract:
{translation}

After examining the translation:

- Briefly justify your total score, up to 100 words.
- Conclude with the score of the translation.
"""

fast_data = FastData(model="claude-3-5-sonnet-20240620")
critiques = fast_data.generate(
    prompt_template=critique_template,
    inputs=[{"translation": f"{t.english} -> {t.german}"} for t in translations],
    schema=TranslationCritique,
    sp=sp
)

print("Critiques:")
print(critiques)

100%|██████████████████████████████████████████████████████████████████████| 3/3 [00:03<00:00,  1.24s/it]

Critiques:
[__main__.TranslationCritique(critique='The translation accurately conveys the meaning of the source text, maintaining the legal context and key terminology. It demonstrates a strong grasp of both languages, effectively translating complex concepts like "legal liabilities" (rechtlichen Haftungen). The German version reads naturally and maintains the formal tone appropriate for legal contexts. It captures nuances such as "down the line" (in der Zukunft) idiomatically. The translation is highly accurate, professionally suitable, and could be the work of an experienced translator. While it\'s excellent, it doesn\'t quite reach the level of absolute mastery that would warrant a perfect score.', score=4), __main__.TranslationCritique(critique='This translation is of high quality, demonstrating professional-level work. It accurately conveys the meaning of the source text, maintaining the appropriate register and tone. The translator has made excellent word choices, such as "Verfec




In [None]:
#| hide
import nbdev; nbdev.nbdev_export()