# core

> Fill in a module description here

In [None]:
#| default_exp core

In [None]:
#| hide
from nbdev.showdoc import *

In [1]:
#| export
import concurrent.futures
from pydantic import BaseModel
import instructor
import anthropic
from tqdm import tqdm
from ratelimit import limits, sleep_and_retry

In [2]:
#| export
class FastData:
    def __init__(self, api_key: str | None = None, calls: int = 100, period: int = 60):
        self.client = instructor.from_anthropic(anthropic.Anthropic(api_key=api_key))
        self.set_rate_limit(calls, period)

    def set_rate_limit(self, calls: int, period: int):
        """Set a new rate limit."""
        @sleep_and_retry
        @limits(calls=calls, period=period)
        def rate_limited_call(model: str, messages: list[dict], response_model: BaseModel):
            return self.client.chat.completions.create(
                model=model,
                max_tokens=4096,
                max_retries=1,
                messages=messages,
                response_model=response_model,
            )
        
        self._rate_limited_call = rate_limited_call

    def generate(self, 
                 prompt_template: str, 
                 inputs: list[dict], 
                 response_model: BaseModel, 
                 model: str = "claude-3-haiku-20240307",
                 max_workers: int = 64) -> list[dict]:
        
        def process_input(input_data):
            try:
                prompt = prompt_template.format(**input_data)
                response = self._rate_limited_call(
                    model=model,
                    messages=[{"role": "user", "content": prompt}],
                    response_model=response_model,
                )
                return response.dict()
            except Exception as e:
                print(f"Error processing input: {e}")
                return None

        results = []
        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
            futures = [executor.submit(process_input, input_data) for input_data in inputs]
            for future in tqdm(concurrent.futures.as_completed(futures), total=len(inputs)):
                result = future.result()
                if result:
                    results.append(result)
        
        return results

In [7]:
from datasets import load_dataset
from pydantic import BaseModel, Field

import pprint

# Create a pretty printer object with custom settings
pp = pprint.PrettyPrinter(indent=4, width=100, compact=False)

class Translation(BaseModel):
    english: str = Field(description="An english phrase")
    german: str = Field(description="An equivalent german phrase that is a translation of the english phrase")

examples = [
    Translation(
        english="Hello, my name is Nathan. I am a research scientist at an AI startup.",
        german="Hallo mein Name ist Nathan. Ich bin wissenschaftlicher Mitarbeiter bei einem KI-Startup."),
    Translation(
        english="How much wood could a woodchuck chuck if a woodchuck could chuck wood?",
        german="Wie viel Holz könnte ein Waldmurmeltier einspannen, wenn ein Waldmurmeltier Holz einspannen könnte?"),
    Translation(
        english="Thomas Cranmer (2 July 1489 - 21 March 1556) was a leader of the English Reformation and Archbishop of Canterbury during the reigns of Henry VIII, Edward VI and, for a short time, Mary I. He helped build the case for the annulment of Henry's marriage to Catherine of Aragon, which was one of the causes of the separation of the English Church from union with the Holy See.",
        german="Thomas Cranmer (2. Juli 1489 - 21. März 1556) war ein Anführer der englischen Reformation und Erzbischof von Canterbury während der Herrschaft von Heinrich VIII., Eduard VI. und für kurze Zeit auch Maria I. Er half bei der Ausarbeitung der Klage für die Aufhebung von Heinrichs Heirat mit Katharina von Aragon, die eine der Ursachen für die Trennung der englischen Kirche von der Union mit dem Heiligen Stuhl war."
    ),
]
examples = "\n- ".join([f"{e.english} -> {e.german}" for e in examples])

# Load personas
personas = load_dataset("proj-persona/PersonaHub", "persona", split='train').select(range(3))['persona']

prompt_template = """\
Here are some examples:
{examples}

Create an english and german translation pair that is similar to the examples and would be appropriate for the following persona: {persona}
"""

# Generate tiny programs
fast_data = FastData()
translations = fast_data.generate(
    prompt_template=prompt_template,
    inputs=[{"persona": persona, "examples": examples} for persona in personas],
    response_model=Translation,
    model="claude-3-haiku-20240307"
)

# Pretty print the translations
print("Translations:")
pp.pprint(translations)

Translations:
[   {   'english': 'Proper documentation of projects is critical to avoid legal risks and '
                   'complications.',
        'german': 'Eine korrekte Dokumentation von Projekten ist entscheidend, um rechtliche '
                  'Risiken und Komplikationen zu vermeiden.'},
    {   'english': 'Hello, my name is Samantha. I am the new general counsel at TurpCo Industries.',
        'german': 'Hallo, mein Name ist Samantha. Ich bin der neue Chefjurist bei TurpCo '
                  'Industries.'},
    {   'english': "I'm going to work hard and become the best player on the team. The other "
                   "players won't be able to stop me.",
        'german': 'Ich werde hart arbeiten und der beste Spieler im Team werden. Die anderen '
                  'Spieler werden mich nicht aufhalten können.'}]


In [9]:
from typing import Literal

class TranslationCritique(BaseModel):
    critique: str = Field(description="A critique of the code.")
    score: Literal[1, 2, 3, 4, 5] = Field(description="A score of the code from 1 to 5.")

critique_template = """\
Below is an extract of a translation. Evaluate its quality as a senior translator would, considering its suitability for professional use. Use the additive 5-point scoring system described below. Points are accumulated based on the satisfaction of each criterion:

- Add 1 point if the translation conveys the basic meaning of the source text, even if it includes some minor errors or awkward phrasing.
- Add another point if the translation is generally accurate but lacks refinement in style or fails to capture some nuances of the original. It might use inconsistent terminology or have occasional lapses in register.
- Award a third point if the translation is appropriate for professional use and accurately conveys key concepts of the source text. It demonstrates good understanding of both languages, though it may not be flawless or could include some slight inconsistencies. It resembles the work of a competent translator but may have room for improvement in fluency or precision.
- Grant a fourth point if the translation is highly accurate and reads naturally in the target language, exhibiting a consistent and appropriate style. It could be similar to the work of an experienced translator, offering faithful rendering of content and tone, with minimal errors, and effectively handling complex concepts or cultural references. The result is coherent, well-expressed, and valuable for its intended purpose.
- Bestow a fifth point if the translation is outstanding, demonstrating mastery of both source and target languages. It captures subtle nuances, maintains the author's voice and intent, and reads as if it were originally written in the target language. The translator has made excellent choices in dealing with challenging elements like wordplay, idiomatic expressions, or culture-specific content.

The translation extract:
{translation}

After examining the translation:

- Briefly justify your total score, up to 100 words.
- Conclude with the score of the translation.
"""

critiques = fast_data.generate(
    prompt_template=critique_template,
    inputs=[{"translation": f"{t['english']} -> {t['german']}"} for t in translations],
    response_model=TranslationCritique,
    model="claude-3-5-sonnet-20240620"
)

# Pretty print the critiques
print("Critiques:")
pp.pprint(critiques)

  0%|          | 0/3 [00:00<?, ?it/s]

100%|██████████| 3/3 [00:03<00:00,  1.26s/it]

Critiques:
[   {   'critique': 'The translation accurately conveys the meaning of the original text, '
                    'maintaining its professional tone and key concepts. It demonstrates a good '
                    'understanding of both languages and appropriately uses German sentence '
                    'structure. The term "Proper documentation" is well-rendered as "Eine korrekte '
                    'Dokumentation," preserving the emphasis on correctness. The translation of '
                    '"legal risks and complications" as "rechtliche Risiken und Komplikationen" is '
                    'precise and maintains the professional register. The sentence flows naturally '
                    "in German and would be suitable for professional use. However, it doesn't "
                    'showcase exceptional mastery or handling of particularly challenging '
                    'linguistic elements, as the source text is relatively straightforward.',
        'score': 4},





In [10]:
#| hide
import nbdev; nbdev.nbdev_export()