In [None]:
import torch
import transformers

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto",
)

messages = [
    {
        "role": "system",
        "content": "You are a pirate chatbot who always responds in pirate speak!",
    },
    {"role": "user", "content": "Who are you?"},
]

prompt = pipeline.tokenizer.apply_chat_template(
    messages, tokenize=False, add_generation_prompt=True
)

terminators = [
    pipeline.tokenizer.eos_token_id,
    pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>"),
]

outputs = pipeline(
    prompt,
    max_new_tokens=256,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.6,
    top_p=0.9,
)
print(outputs[0]["generated_text"][len(prompt) :])

In [None]:
import pandas as pd
import torch
import transformers

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto",
)

df = pd.read_parquet("db_jlt_classified_fireworks_step2.parquet")
df["Classified_ollama_70b"] = None

myidx = df[df["Tokens_llama"] < 7500].index

sys_prompt_classify2 = """This is an excerpt of an academic paper.
Based on the input text generate a YAML containing: PHOTONIC, SiPHOTONIC, EXP, and CAT.
"PHOTONIC" should be "true" if the text is about a photonic device, system, circuit, or chip; otherwise "false".
"SiPHOTONIC" should be "true" if the text is about a silicon photonic device, system, circuit, or chip; otherwise "false".
"EXP" should be "true" is this is an experimental paper; and "false" if it is only a theoretical or numerical study.
"CAT" labels the application field of the work which could be TELECOM, DATACOM, SENSING, BIOLOGY, DISPLAY, or another single term to describe it best.

Your answer should be only four lines and strictly YAML formatted, as shown in the example below:
<example_output>
PHOTONIC: true
SiPHOTONIC: true
EXP: false
CAT: TELECOM
</example_output>
Don't preamble your output with phrases like "Here is the YAML data:" or "Here is the YAML output:".
"""


for idx in myidx[:10]:
    text = df.loc[idx]["Text"]

    messages = [
        {"role": "system", "content": sys_prompt_classify2},
        {"role": "user", "content": text},
    ]

    prompt = pipeline.tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )

    terminators = [
        pipeline.tokenizer.eos_token_id,
        pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>"),
    ]

    outputs = pipeline(
        prompt,
        max_new_tokens=50,
        eos_token_id=terminators,
        do_sample=True,
        temperature=0.1,
        top_p=0.9,
    )

    print("==========")
    print(outputs[0]["generated_text"][len(prompt) :])

In [None]:
import torch
import transformers

model_id = "meta-llama/Meta-Llama-3-70B-Instruct"

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto",
)

messages = [
    {
        "role": "system",
        "content": "You are a pirate chatbot who always responds in pirate speak!",
    },
    {"role": "user", "content": "Who are you?"},
]

prompt = pipeline.tokenizer.apply_chat_template(
    messages, tokenize=False, add_generation_prompt=True
)

terminators = [
    pipeline.tokenizer.eos_token_id,
    pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>"),
]

outputs = pipeline(
    prompt,
    max_new_tokens=256,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.6,
    top_p=0.9,
)
print(outputs[0]["generated_text"][len(prompt) :])

In [None]:
from transformers import AutoModel

model = AutoModel.from_pretrained(
    "MaziyarPanahi/Llama-3-8B-Instruct-64k-GGUF",
    gguf_file="Llama-3-8B-Instruct-64k.Q2_K.gguf",
)


# model_id = "MaziyarPanahi/Llama-3-8B-Instruct-v0.9-GGUF"
# filename = "Llama-3-8B-Instruct-v0.9.Q4_K_M.gguf"

# tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
# model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename)

In [None]:
import transformers

pipeline = transformers.pipeline(
    "text-generation",
    model="MaziyarPanahi/Llama-3-8B-Instruct-64k-GGUF",
    gguf_file="Llama-3-8B-Instruct-64k.Q2_K.gguf",
)