In [None]:
from langchain import PromptTemplate, LLMChain
from langchain.llms import HuggingFacePipeline

template = """
Generate a training example for a customer support chatbot:
Query: {query}
Intent: {intent}
Response: {response}
"""
prompt = PromptTemplate(template=template, input_variables=["query", "intent", "response"])
llm = HuggingFacePipeline.from_model_id("mistralai/Mistral-7B-Instruct-v0.1", task="text-generation")
chain = LLMChain(prompt=prompt, llm=llm)

tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model

# Load model with 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-Instruct-v0.1",
    quantization_config=bnb_config,
    device_map="auto"
)

# Prepare for PEFT
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, peft_config)

# Tokenize and format your synthetic dataset
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
train_dataset = ...  # Load your generated dataset here

# Train
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="mistral-7b-finetuned",
    per_device_train_batch_size=1,  # Adjust for GPU memory
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    learning_rate=2e-5,
    fp16=True,
    logging_steps=10,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    data_collator=lambda data: {"input_ids": torch.stack([f["input_ids"] for f in data])}
)
trainer.train()

In [None]:
prompt = """
You are a customer support chatbot. Respond to the user query below based on their intent.
Query: {query}
Intent: {intent}
Response:
"""

In [None]:
from transformers import pipeline

# Load fine-tuned model
model = AutoModelForCausalLM.from_pretrained("mistral-7b-finetuned", device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("mistral-7b-finetuned")

# Create pipeline
chatbot = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device=0,  # Use GPU
)

# Generate responses
def generate_response(query, intent):
    prompt = f"""
    You are a customer support chatbot. Respond to the user query below based on their intent.
    Query: {query}
    Intent: {intent}
    Response:
    """
    return chatbot(prompt, max_length=150)[0]['generated_text']