In [1]:
import anthropic

client = anthropic.Anthropic()

print(client.models.list(limit=20))

SyncPage[ModelInfo](data=[ModelInfo(id='claude-3-5-sonnet-20241022', created_at=datetime.datetime(2024, 10, 22, 0, 0, tzinfo=datetime.timezone.utc), display_name='Claude 3.5 Sonnet (New)', type='model'), ModelInfo(id='claude-3-5-haiku-20241022', created_at=datetime.datetime(2024, 10, 22, 0, 0, tzinfo=datetime.timezone.utc), display_name='Claude 3.5 Haiku', type='model'), ModelInfo(id='claude-3-5-sonnet-20240620', created_at=datetime.datetime(2024, 6, 20, 0, 0, tzinfo=datetime.timezone.utc), display_name='Claude 3.5 Sonnet (Old)', type='model'), ModelInfo(id='claude-3-haiku-20240307', created_at=datetime.datetime(2024, 3, 7, 0, 0, tzinfo=datetime.timezone.utc), display_name='Claude 3 Haiku', type='model'), ModelInfo(id='claude-3-opus-20240229', created_at=datetime.datetime(2024, 2, 29, 0, 0, tzinfo=datetime.timezone.utc), display_name='Claude 3 Opus', type='model'), ModelInfo(id='claude-3-sonnet-20240229', created_at=datetime.datetime(2024, 2, 29, 0, 0, tzinfo=datetime.timezone.utc), di

# Manual testing

In [34]:
message = client.messages.create(
    model="claude-3-5-sonnet-20241022",
    max_tokens=100,
    temperature=0.2,
    top_p=1,
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "Can you translate the following sentence into French: I am Afghan."
                }
            ]
        }
    ]
)
print(message.content)

[TextBlock(text='In French, "I am Afghan" translates to:\n\n"Je suis Afghan" (if you are male)\nor\n"Je suis Afghane" (if you are female)', type='text')]


In [36]:
result = str(message.content)

In [39]:
import re
# result = res.replace("\\\\", "\\")
# print(result)
match = re.search(r"text='(.*?)', type=", result)
res = match.group(1)
res = res.replace("\\n", "\n")
print(res)

In French, "I am Afghan" translates to:

"Je suis Afghan" (if you are male)
or
"Je suis Afghane" (if you are female)


In [40]:
res

'In French, "I am Afghan" translates to:\n\n"Je suis Afghan" (if you are male)\nor\n"Je suis Afghane" (if you are female)'

# Data Preparation

In [2]:
import pandas as pd
import ast

df = pd.read_csv('output/dataset.csv')
df['sentence'] = df['sentence'].apply(lambda x: ast.literal_eval(x)) # str to dict
df.iloc[130]['sentence']

{'eng': {'m': None, 'f': None, 'n': 'I am Nepalese.'},
 'fra': {'m': 'Je suis Népalais.', 'f': 'Je suis Népalaise.', 'n': None},
 'spa': {'m': 'Soy nepalés.', 'f': 'Soy nepalésa.', 'n': None},
 'deu': {'m': 'Ich bin Nepalese.', 'f': 'Ich bin  Nepalesin.', 'n': None},
 'it': {'m': None, 'f': None, 'n': 'Sono nepalese.'}}

# Experiment Function

In [7]:
languages = {
    # 'fra': 'French',
    # 'spa': 'Spanish',
    # 'deu': 'German',
    'it': 'Italian'
}

models = ["claude-3-5-sonnet-20241022", "claude-3-5-haiku-20241022"]

In [11]:
import re
def experiment(model_name, lang_name, sentence):
    '''
    Translates a given sentence into the specified target language using the specified model.

    Parameters:
    - model (str): The name of the model to use .
    - language (str): The target language for the translation.
    - sentence (str): The sentence to be translated.

    Returns:
    - str: The translated sentence generated by the model.
    '''
    
    message = client.messages.create(
        model=model_name,
        max_tokens=100,
        temperature=0.2,
        top_p=1,
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": f"Can you translate the following sentence into {lang_name}: {sentence}"
                    }
                ]
            }
        ]
    )

    output = str(message.content)
    match = re.search(r"text=(.*?), type=", output)
    res = match.group(1)
    res = res.replace("\\n", "\n")
    return output

In [12]:
import pandas as pd
from tqdm import tqdm

# Add progress bar functionality
tqdm.pandas()

def process_sentences(df, languages, models, output_folder):
    """
    Processes sentences for multiple languages and models.

    Args:
        df (pd.DataFrame): The input DataFrame containing sentences.
        languages (dict): A dictionary where keys are language codes (e.g., 'fra', 'spa') 
                          and values are full language names (e.g., 'French', 'Spanish').
        models (list): A list of model names to use for processing.
        output_folder (str): The folder to save output CSV files.

    Returns:
        None
    """
    for model_name in models:
        for lang_code, lang_name in languages.items():
            # Create a new DataFrame for the current language
            df_exp = pd.DataFrame(columns=['eng', f'{lang_code}_m', f'{lang_code}_f', f'{lang_code}_n', 'output'])
            df_exp['eng'] = df['sentence'].apply(lambda x: x.get('eng', {}).get('n'))
            df_exp[f'{lang_code}_m'] = df['sentence'].apply(lambda x: x.get(lang_code, {}).get('m'))
            df_exp[f'{lang_code}_f'] = df['sentence'].apply(lambda x: x.get(lang_code, {}).get('f'))
            df_exp[f'{lang_code}_n'] = df['sentence'].apply(lambda x: x.get(lang_code, {}).get('n'))

            # Apply the experiment function
            df_exp['output'] = df_exp['eng'].progress_apply(lambda sentence: experiment(model_name, lang_name, sentence))

            # Save the results to a CSV file
            # output_path = f"{output_folder}/{lang_code}_{model_name.split('claude-3-5-')[1]}_exp.csv"
            output_path = f"{output_folder}/{lang_code}_{model_name.split('claude-')[1]}_exp.csv"
            df_exp.to_csv(output_path, index=False)
            print(f"Saved results to {output_path}")

In [14]:
models.append('claude-3-opus-20240229')
print(models)
process_sentences(df, languages, [models[2]], output_folder='output/anthropic')

['claude-3-5-sonnet-20241022', 'claude-3-5-haiku-20241022', 'claude-3-opus-20240229']


100%|█████████████████████████████████████████| 193/193 [10:57<00:00,  3.40s/it]

Saved results to output/anthropic/it_3-opus-20240229_exp.csv



