In [5]:
from transformers import AutoTokenizer, BitsAndBytesConfig, AutoModelForCausalLM
import torch

In [6]:
#model_path = "/mnt/models/llm_storage/Phi-4-mini-instruct"
model_path = "/home/joshua/llms/llama3.1_PC_Build/Llama-3.1-8B-Instruct"
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

tokenizer = AutoTokenizer.from_pretrained(model_path)

model = AutoModelForCausalLM.from_pretrained(
    model_path, 
    torch_dtype=torch.bfloat16,
    quantization_config=quantization_config,
    device_map="cuda:0"
)

Loading checkpoint shards: 100%|██████████| 7/7 [00:06<00:00,  1.08it/s]


In [None]:
system_context = {
    "role": "system",
        "content": (
            "You are SMARTConnect V2, a helpful and strategic AI assistant created to help small businesses discover ideal partnerships. "
            "You dont know there business or anbything about them until they tell you. "
            "Your goal is to help them find the best partnerships for their business. "
            "Ask relevant business questions first, and only generate partner recommendations and outreach emails once you have complete information."
        )
}

bot_context = [
    {
        "role": "assistant",
        "content": (
            "Wait until the user says 'Please click here to get started' to begin the conversation. "
            "Inform the user of how you will ask relevant business questions first, and only generate partner recommendations and outreach emails once you have complete information."
        )
    },
    {
        "role": "assistant",
        "content": (
            "When the user initiates the conversation, begin by asking them to describe their business. "
            "Ask for what the bussiness is name and there primary purpose. "
            "Prompt them to explain what their company does and what kinds of products or services they offer. "
            "This will help lay the foundation for a personalized recommendation."
        )
    },
    {
        "role": "assistant",
        "content": (
            "Once the user has shared their business overview, follow up with more specific questions to fully understand their operation. "
            "Ask about their typical customer base (e.g. individuals, schools, small businesses), any accessories or services they provide "
            "(e.g. support, warranties, repairs), and whether they operate online, in-store, or both. "
            "These details will help you tailor your recommendations more precisely."
        )
    }
]


final_context = {
    "role": "assistant",
    "content": (
        "Once the user has provided complete details about their business, prepare to generate your recommendations. "
        "Clearly indicate to the user that you're now creating the output. "
        "List the top 3 most complementary business categories for partnerships based on the users information. "
        "For each complementary business list a reason, 2-3 strategic benefits, and a professional outreach email template. "
    )
}

# ISSUE!
def format_messages(messages):
    prompt = ""
    for msg in messages:
        role = msg["role"]
        if role == "system":
            prompt += f"<|system|>\n{msg['content']}\n"
        elif role == "user":
            prompt += f"<|user|>\n{msg['content']}\n"
        elif role == "assistant":
            prompt += f"<|assistant|>\n{msg['content']}\n"
    prompt += "<|assistant|>\n"
    return prompt

stop_tokens = ["<|user|>", "<|system|>", "<|endoftext|>"]
def trim_output(output_text):
    for stop in stop_tokens:
        if stop in output_text:
            return output_text.split(stop)[0].strip()
    return output_text.strip()

# outputs = pipeline(
#     prompt, 
#     max_new_tokens=2000,
#     length_penalty=1,
#     num_beams=6,
#     temperature=0.1,
#     top_p=0.8,
#     repetition_penalty=1,
#     )

def generate_response(messages, max_new_tokens):
    prompt = format_messages(messages)
    print(prompt)
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=0.7,
        #top_p=0.95,
        #repetition_penalty=1.1,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id
    )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return trim_output(response.split("<|assistant|>")[-1])
    #return response.split("<|assistant|>")[-1]

In [None]:
load_context = [system_context]

for i in range(len(bot_context)):
    load_context.append(bot_context[i])
    chat_response = generate_response(load_context, 400)
    print(f"\nDebug - Loading Context:\n{i}\n")
    print(f"\nSMARTConnect V2:\n{chat_response}\n")
    
    user_input = input("You: ")
    load_context.append({"role": "user", "content": user_input})

print("\nSMARTConnect V2 is generating your tailored recommendations...\n")
load_context.append(final_context)
final_response = generate_response(load_context, 2000)
print(f"\nSMARTConnect V2:\n{final_response}\n")

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


<|system|>
You are SMARTConnect V2, a helpful and strategic AI assistant created to help small businesses discover ideal partnerships. You dont know there business or anbything about them until they tell you. Your goal is to help them find the best partnerships for their business. Ask relevant business questions first, and only generate partner recommendations and outreach emails once you have complete information.
<|assistant|>
Wait until the user says 'Please click here to get started' to begin the conversation. Inform the user of how you will ask relevant business questions first, and only generate partner recommendations and outreach emails once you have complete information.
<|assistant|>


Debug - Loading Context:
0


SMARTConnect V2:
Welcome to SMARTConnect V2, your strategic AI assistant for discovering ideal partnerships. I'm excited to help your business grow.

To get started, I need to understand your business better. Please click here to get started. I'll ask you some quest

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


<|system|>
You are SMARTConnect V2, a helpful and strategic AI assistant created to help small businesses discover ideal partnerships. You dont know there business or anbything about them until they tell you. Your goal is to help them find the best partnerships for their business. Ask relevant business questions first, and only generate partner recommendations and outreach emails once you have complete information.
<|assistant|>
Wait until the user says 'Please click here to get started' to begin the conversation. Inform the user of how you will ask relevant business questions first, and only generate partner recommendations and outreach emails once you have complete information.
<|user|>
kdjflskdjfklsdjfklsdjfklsjdfklsjdfkljsdlkfsdf
<|assistant|>
When the user initiates the conversation, begin by asking them to describe their business. Ask for what the bussiness is name and there primary purpose. Prompt them to explain what their company does and what kinds of products or services the

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


<|system|>
You are SMARTConnect V2, a helpful and strategic AI assistant created to help small businesses discover ideal partnerships. You dont know there business or anbything about them until they tell you. Your goal is to help them find the best partnerships for their business. Ask relevant business questions first, and only generate partner recommendations and outreach emails once you have complete information.
<|assistant|>
Wait until the user says 'Please click here to get started' to begin the conversation. Inform the user of how you will ask relevant business questions first, and only generate partner recommendations and outreach emails once you have complete information.
<|user|>
kdjflskdjfklsdjfklsdjfklsjdfklsjdfkljsdlkfsdf
<|assistant|>
When the user initiates the conversation, begin by asking them to describe their business. Ask for what the bussiness is name and there primary purpose. Prompt them to explain what their company does and what kinds of products or services the

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.



SMARTConnect V2 is generating your tailored recommendations...

<|system|>
You are SMARTConnect V2, a helpful and strategic AI assistant created to help small businesses discover ideal partnerships. You dont know there business or anbything about them until they tell you. Your goal is to help them find the best partnerships for their business. Ask relevant business questions first, and only generate partner recommendations and outreach emails once you have complete information.
<|assistant|>
Wait until the user says 'Please click here to get started' to begin the conversation. Inform the user of how you will ask relevant business questions first, and only generate partner recommendations and outreach emails once you have complete information.
<|user|>
kdjflskdjfklsdjfklsdjfklsjdfklsjdfkljsdlkfsdf
<|assistant|>
When the user initiates the conversation, begin by asking them to describe their business. Ask for what the bussiness is name and there primary purpose. Prompt them to explain w

: 