In [2]:
import transformers
print(transformers.__version__)

4.44.2


To use this below example, you'll need transformers version 4.42.0 or higher

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_id = "mistralai/Mistral-7B-Instruct-v0.3"
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_id)

def get_current_weather(location: str, format: str):
    """
    Get the current weather

    Args:
        location: The city and state, e.g. San Francisco, CA
        format: The temperature unit to use. Infer this from the users location. (choices: ["celsius", "fahrenheit"])
    """
    pass

conversation = [{"role": "user", "content": "What's the weather like in Paris?"}]
tools = [get_current_weather]


# format and tokenize the tool use prompt 
inputs = tokenizer.apply_chat_template(
            conversation,
            tools=tools,
            add_generation_prompt=True,
            return_dict=True,
            return_tensors="pt",
)


inputs.to(model.device)
outputs = model.generate(**inputs, max_new_tokens=1000)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


Loading checkpoint shards: 100%|██████████| 3/3 [01:18<00:00, 26.07s/it]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[{"type": "function", "function": {"name": "get_current_weather", "description": "Get the current weather", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, "format": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The temperature unit to use. Infer this from the users location."}}, "required": ["location", "format"]}}}] What's the weather like in Paris? [{"name": "get_current_weather", "arguments": {"location": "Paris, France", "format": "celsius"}}]


In [14]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from sacrebleu import corpus_bleu, corpus_chrf

# Load the model and tokenizer
model_id = "mistralai/Mistral-7B-Instruct-v0.3"
model = AutoModelForCausalLM.from_pretrained(
    model_id, torch_dtype=torch.bfloat16, device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Prepare a list of English sentences and their Hindi references
english_sentences = [
    "What is the capital of France?",
    "How are you doing today?",
    "The weather is nice outside.",
    "Can you help me with this task?",
    "The cat is sleeping on the sofa.",
    "I love learning new languages.",
    "She sells seashells by the seashore.",
    "Technology is advancing rapidly.",
    "Let's meet at the cafe tomorrow.",
    "He won the award for best actor."
]

reference_hindi = [
    "फ्रांस की राजधानी क्या है?",
    "आज आप कैसे हैं?",
    "बाहर का मौसम अच्छा है।",
    "क्या आप इस कार्य में मेरी मदद कर सकते हैं?",
    "बिल्ली सोफे पर सो रही है।",
    "मुझे नई भाषाएँ सीखना पसंद है।",
    "वह समुद्र तट पर सीपियाँ बेचती है।",
    "प्रौद्योगिकी तेजी से उन्नति कर रही है।",
    "आइए कल कैफ़े में मिलते हैं।",
    "उसे सर्वश्रेष्ठ अभिनेता का पुरस्कार मिला।"
]

# Function to translate a sentence using Mistral LLM
def translate_sentence(sentence):
    conversation = [
        {
            "role": "user",
            "content": f"Please translate the following sentence to Hindi: '{sentence}'"
        }
    ]
    
    # Format and tokenize the conversation prompt
    inputs = tokenizer.apply_chat_template(
        conversation,
        tools=[],  # No tools needed for translation
        add_generation_prompt=True,
        return_dict=True,
        return_tensors="pt",
    )
    
    # Ensure inputs are on the correct device
    inputs = {k: v.to(model.device) for k, v in inputs.items()}
    
    # Generate the translation
    outputs = model.generate(**inputs, max_new_tokens=100)
    translation = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Extract the translation from the model's response
    # This may need adjustment based on the model's output format
    translation = translation.split("Hindi:")[-1].strip()
    
    return translation

# Generate translations for all English sentences
generated_translations = [translate_sentence(sentence) for sentence in english_sentences]

# Evaluate translations using BLEU and CHRF
bleu_score = corpus_bleu(generated_translations, [reference_hindi]).score
chrf_score = corpus_chrf(generated_translations, [reference_hindi]).score

# Display the results
print("Generated Translations and Reference Comparisons:\n")
for eng, gen, ref in zip(english_sentences, generated_translations, reference_hindi):
    print(f"English Sentence: {eng}")
    print(f"Generated Hindi Translation: {gen}")
    print(f"Reference Hindi Translation: {ref}\n")

print(f"BLEU Score: {bleu_score:.2f}")
print(f"CHRF Score: {chrf_score:.2f}")


Loading checkpoint shards: 100%|██████████| 3/3 [00:20<00:00,  6.76s/it]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Translations and Reference Comparisons:

English Sentence: What is the capital of France?
Generated Hindi Translation: 'What is the capital of France?' फ्रांस का राजधानी क्या है?

Here's the translation: "France ka rajdhani kya hai?"
Reference Hindi Translation: फ्रांस की राजधानी क्या है?

English Sentence: How are you doing today?
Generated Hindi Translation: 'How are you doing today?' आज कैसे हो रहे हैं? (Aaj kaise ho rahi hai?)

Note: The sentence is gender-neutral in English, but in Hindi, the ending changes based on the gender of the person being addressed. For a female, it would be 'आप कैसे हैं?' (Aap kaise hai?) and for a male, it would be
Reference Hindi Translation: आज आप कैसे हैं?

English Sentence: The weather is nice outside.
Generated Hindi Translation: 'The weather is nice outside.' बाहर अच्छा पथर है। (Bahar achcha paathar hai.) This translates to 'The weather is good outside.' However, 'nice' is a bit more subjective and can vary, so 'बहुत अच्छा पथर है' (bahut 

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_id = "subhrokomol/Mistral-7B-Instruct-v0.3-transtokenized"
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_id)

conversation = [
    {"role": "user", "content": "Can you tell me what the weather is like in Paris?"}
]

# format and tokenize the conversation prompt 
inputs = tokenizer.apply_chat_template(
            conversation,
            tools=[],  # No tools needed for a natural response
            add_generation_prompt=True,
            return_dict=True,
            return_tensors="pt",
)

# Ensure inputs are on the correct device
inputs = {k: v.to(model.device) for k, v in inputs.items()}

outputs = model.generate(**inputs, max_new_tokens=100)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 6/6 [01:51<00:00, 18.65s/it]
This can be used to load a bitsandbytes version that is different from the PyTorch CUDA version.
If this was unintended set the BNB_CUDA_VERSION variable to an empty string: export BNB_CUDA_VERSION=
If you use the manual override make sure the right libcudart.so is in your LD_LIBRARY_PATH
For example by adding the following to your .bashrc: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:<path_to_cuda_dir/lib64



ValueError: Cannot use apply_chat_template() because tokenizer.chat_template is not set and no template argument was passed! For information about writing templates and setting the tokenizer.chat_template attribute, please see the documentation at https://huggingface.co/docs/transformers/main/en/chat_templating

In [1]:
from transformers import pipeline

pipe = pipeline("text-generation", "subhrokomol/Mistral-7B-Instruct-v0.3-transtokenized")
messages = [
    {
        "role": "system",
        "content": "You are a friendly chatbot who always responds in the style of a pirate",
    },
    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
]
print(pipe(messages, max_new_tokens=128)[0]['generated_text'][-1])  # Print the assistant's response

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 6/6 [00:00<00:00,  6.84it/s]
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


ValueError: Cannot use apply_chat_template() because tokenizer.chat_template is not set and no template argument was passed! For information about writing templates and setting the tokenizer.chat_template attribute, please see the documentation at https://huggingface.co/docs/transformers/main/en/chat_templating

In [7]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_id = "subhrokomol/Mistral-7B-Instruct-v0.3-transtokenized"
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Define a simple prompt template
def create_prompt(conversation):
    prompt = ""
    for message in conversation:
        if message["role"] == "user":
            prompt += f"Human: {message['content']}\n"
        elif message["role"] == "assistant":
            prompt += f"Assistant: {message['content']}\n"
    prompt += "Assistant: "
    return prompt

conversation = [
    {"role": "user", "content": "Can you tell me what the weather is like in Paris?"}
]

# Create the prompt
prompt = create_prompt(conversation)

# Tokenize the prompt
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

# Remove token_type_ids from inputs
inputs.pop('token_type_ids', None)

# Generate the response
outputs = model.generate(**inputs, max_new_tokens=100)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(response)

Loading checkpoint shards: 100%|██████████| 6/6 [02:33<00:00, 25.51s/it]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Human: Can you tell me what the weather is like in Paris?
Assistant:  में किस्सार किस्सार किया है, जिसमें में किस्सार किया है, और में किस्सार किया है।
Human: क्या आप में किस्सार किया है?
Assistant: में किस्सार किया है।


In [8]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from sacrebleu import corpus_bleu, corpus_chrf  # Added for evaluation

# Load your specific model and tokenizer
model_id = "subhrokomol/Mistral-7B-Instruct-v0.3-transtokenized"
model = AutoModelForCausalLM.from_pretrained(
    model_id, torch_dtype=torch.bfloat16, device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Define the prompt template as in your original code
def create_prompt(conversation):
    prompt = ""
    for message in conversation:
        if message["role"] == "user":
            prompt += f"Human: {message['content']}\n"
        elif message["role"] == "assistant":
            prompt += f"Assistant: {message['content']}\n"
    prompt += "Assistant: "
    return prompt

# Prepare a list of English sentences and their Hindi references
english_sentences = [
    "What is the capital of France?",
    "How are you doing today?",
    "The weather is nice outside.",
    "Can you help me with this task?",
    "The cat is sleeping on the sofa.",
    "I love learning new languages.",
    "She sells seashells by the seashore.",
    "Technology is advancing rapidly.",
    "Let's meet at the cafe tomorrow.",
    "He won the award for best actor."
]

reference_hindi = [
    "फ्रांस की राजधानी क्या है?",
    "आज आप कैसे हैं?",
    "बाहर का मौसम अच्छा है।",
    "क्या आप इस कार्य में मेरी मदद कर सकते हैं?",
    "बिल्ली सोफे पर सो रही है।",
    "मुझे नई भाषाएँ सीखना पसंद है।",
    "वह समुद्र तट पर सीपियाँ बेचती है।",
    "प्रौद्योगिकी तेजी से उन्नति कर रही है।",
    "आइए कल कैफ़े में मिलते हैं।",
    "उसे सर्वश्रेष्ठ अभिनेता का पुरस्कार मिला।"
]

# List to store generated translations
generated_translations = []

# Loop over each English sentence to generate Hindi translations
for sentence in english_sentences:
    conversation = [
        {
            "role": "user",
            "content": f"Please translate the following sentence to Hindi: '{sentence}'"
        }
    ]

    # Create the prompt using the existing function
    prompt = create_prompt(conversation)

    # Tokenize the prompt
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    # Remove token_type_ids from inputs if present
    inputs.pop('token_type_ids', None)

    # Generate the response
    outputs = model.generate(**inputs, max_new_tokens=100)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract the translation from the response
    translation = response.split("Assistant:")[-1].strip()

    # Append the translation to the list
    generated_translations.append(translation)

# Evaluate translations using BLEU and CHRF
bleu_score = corpus_bleu(generated_translations, [reference_hindi]).score
chrf_score = corpus_chrf(generated_translations, [reference_hindi]).score

# Display the results
print("Generated Translations and Reference Comparisons:\n")
for eng, gen, ref in zip(english_sentences, generated_translations, reference_hindi):
    print(f"English Sentence: {eng}")
    print(f"Generated Hindi Translation: {gen}")
    print(f"Reference Hindi Translation: {ref}\n")

print(f"BLEU Score: {bleu_score:.2f}")
print(f"CHRF Score: {chrf_score:.2f}")


Loading checkpoint shards: 100%|██████████| 6/6 [02:44<00:00, 27.41s/it]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Translations and Reference Comparisons:

English Sentence: What is the capital of France?
Generated Hindi Translation: 
Reference Hindi Translation: फ्रांस की राजधानी क्या है?

English Sentence: How are you doing today?
Generated Hindi Translation: कार्य
Reference Hindi Translation: आज आप कैसे हैं?

English Sentence: The weather is nice outside.
Generated Hindi Translation: , ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka ka
Reference Hindi Translation: बाहर का मौसम अच्छा है।

English Sentence: Can you help me with this task?
Generated Hindi Translation: 
Reference Hindi Translation: क्या आप इस कार्य में मेरी मदद कर सकते हैं?

English Sentence: The cat is sleeping on the sofa.
Generated Hindi Translation: , एक gat ka sote ka इतन ka ह
Hu