In [None]:
!pip install transformers accelerate bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.

In [None]:

from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import login
import torch

def load_mistral(token: str):
    login(token=token)

    model_id = "mistralai/Mistral-7B-Instruct-v0.1"

    tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=True)
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        torch_dtype=torch.float16,
        device_map="auto",  # lets Accelerate offload automatically
        use_auth_token=True
    )
    model.eval()
    return model, tokenizer


In [None]:

import torch

# 🔐 Hugging Face Token
HF_TOKEN = "hugging_face_token"

# ✅ Load model
print("Loading model...")
model, tokenizer = load_mistral(HF_TOKEN)
print("Model loaded!")

# 🔄 Build prompt
def build_prompt(topic, background):
    return f"""<s>[INST] Write a friendly and casual conversation between Alice and Bob.
They are talking about "{topic}".
Background: {background}

Make it sound like how real people talk. Keep it natural and relaxed. Use short, simple sentences. Avoid robotic or stiff replies.

Alice: Hey Bob, guess what?
Bob:"""

# 🔁 Conversation generation
while True:
    topic = input("\nEnter a topic (or type 'quit'): ")
    if topic.strip().lower() == 'quit':
        break

    background = input("Enter background information: ")
    prompt = build_prompt(topic, background)

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    output = model.generate(
        **inputs,
        max_new_tokens=500,
        temperature=0.9,
        top_p=0.95,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id
    )

    result = tokenizer.decode(output[0], skip_special_tokens=True)

    # 🧼 Clean and extract just the conversation
    start = result.find("Alice: ")
    conversation = result[start:].strip() if start != -1 else result.strip()

    print("\n🔹 Generated Conversation:\n")
    print(conversation)


Loading model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]



Model loaded!

Enter a topic (or type 'quit'): food
Enter background information: alice likes indian punjabi food, bob likes chinese food

🔹 Generated Conversation:

Alice: Hey Bob, guess what?
Bob: What’s up Alice?
Alice: I’ve been craving some good Punjabi food lately. You know the kind that’s spicy and flavorful.
Bob: Oh yeah! I love Punjabi food too. But I’m actually in the mood for some Chinese food right now.
Alice: Chinese food? Really? That's interesting.
Bob: Yeah, I was thinking about trying out some dim sum or maybe some Kung Pao chicken. What do you think?
Alice: That sounds delicious! But if you ever want to try some Punjabi food, I can definitely recommend some places around here.
Bob: Will do, Alice. Thanks for the offer.
Alice: No problem, Bob. Food is always a great way to bond and explore new cultures.
Bob: Absolutely, Alice. And you know what? Maybe we can even cook up a Punjabi meal together sometime.
Alice: That would be so much fun! I’d love to teach you some of t

In [None]:
# 🔄 Build prompt
def build_prompt(topic, background, mode):
    intro = f"""<s>[INST] Write a friendly and casual {mode} conversation {"between Alice and Bob" if mode == "dyadic" else "among Alice, Bob, and Charlie"}.
They are talking about "{topic}".
Background: {background}

Make it sound like how real people talk. Keep it natural and relaxed. Use short, simple sentences. Avoid robotic or stiff replies.\n\n"""

    if mode == "dyadic":
        intro += "Alice: Hey Bob, guess what?\nBob:"
    else:
        intro += "Alice: Hey Bob, Charlie! Guess what?\nBob:"

    return intro

# 🔁 Conversation generation loop
while True:
    topic = input("\nEnter a topic (or type 'quit'): ")
    if topic.strip().lower() == 'quit':
        break

    mode = ""
    while mode not in ["dyadic", "triadic"]:
        mode = input("Choose conversation type ('dyadic' or 'triadic'): ").strip().lower()

    background = input("Enter background information: ")

    prompt = build_prompt(topic, background, mode)

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    output = model.generate(
        **inputs,
        max_new_tokens=800,
        temperature=0.9,
        top_p=0.95,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id
    )

    result = tokenizer.decode(output[0], skip_special_tokens=True)

    # 🧼 Extract just the conversation
    start = result.find("Alice: ")
    conversation = result[start:].strip() if start != -1 else result.strip()

    print("\n🔹 Generated Conversation:\n")
    print(conversation)


Enter a topic (or type 'quit'): food
Choose conversation type ('dyadic' or 'triadic'): triadic
Enter background information: Alice likes indian food, Bob likes continental and Charlie likes to try every cuisine

🔹 Generated Conversation:

Alice: Hey Bob, Charlie! Guess what?
Bob: Hey Alice, Charlie! What's up?
Charlie: Hi! Alice, what's cooking tonight?
Alice: Actually, we decided to order some Indian food. What do you guys say?
Bob: Wow, that's new! I've never had Indian food before. I usually prefer Continental. But sure, let's give it a try!
Charlie: Me too! I like trying new cuisines. I've had Indian, Chinese, Italian, and Mexican. But my favorite is still Continental.
Alice: Really? What's your favorite dish from Continental cuisine?
Bob: It has to be Spaghetti Carbonara. And what about you, Alice? What's your favorite Indian dish?
Alice: Oh, I love Butter Chicken! How about you, Charlie?
Charlie: My favorite Indian dish is Chicken Tikka Masala. But I've tried some amazing Sushi 