## Setting up the environment

In [None]:
!rm -rf ~/.cache/huggingface

In [None]:
!pip uninstall -y transformers
!pip install -q --upgrade torch==2.5.1+cu124 torchvision==0.20.1+cu124 torchaudio==2.5.1+cu124 --index-url https://download.pytorch.org/whl/cu124
!pip install -q requests bitsandbytes==0.46.0 transformers==4.48.3 accelerate==1.3.0 openai scikit-learn

In [None]:
LLAMA = "meta-llama/Meta-Llama-3.1-8B-Instruct"

## Importing the libraries

In [None]:
import os
import json
import torch
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig

#  QUANTIZATION CONFIG

In [None]:
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

# INITIALIZE LLAMA (MAIN MODEL)

In [None]:
llama_model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
llama_tokenizer = AutoTokenizer.from_pretrained(llama_model_name)
llama_model = AutoModelForCausalLM.from_pretrained(
    llama_model_name,
    device_map="auto",
    quantization_config=quant_config
)

In [None]:
llama_pipeline = pipeline(
    "text-generation",
    model=llama_model,
    tokenizer=llama_tokenizer,
    torch_dtype=torch.float16,
    device_map="auto"
)

# Tool creation

In [None]:
import json
import re
import pandas as pd
import os

def generate_synthetic_data_with_llama(schema: str, count: int):
    print("Llama 3.1 model was called")

    llama_prompt = f"""
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are an expert synthetic data generator. Generate {count} rows of synthetic data following the provided schema.
<|eot_id|><|start_header_id|>user<|end_header_id|>
Generate {count} rows of synthetic data following this schema:
{schema}

Return only valid JSON with keys:
- "data": list of objects with feature:value pairs
- "model_type": classification | regression | unsupervised
- "data_description": short description of dataset topic

!!! Do NOT output anything except the JSON !!!
<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

    try:
        # Generate response using Llama pipeline
        response = llama_pipeline(
            llama_prompt,
            max_new_tokens=2000,
            do_sample=True,
            temperature=0.7,
            pad_token_id=llama_tokenizer.eos_token_id
        )[0]['generated_text']

        # Extract the assistant's last block
        if "<|start_header_id|>assistant<|end_header_id|>" in response:
            response = response.split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip()

        print("Raw model response ->", response)

        # --- Extract ONLY the JSON block ---
        match = re.search(r"\{[\s\S]*\}", response)
        if not match:
            raise ValueError("No JSON object detected in model output.")

        json_str = match.group(0).strip()
        print("Extracted JSON ->", json_str)

        # Parse JSON safely
        data_json = json.loads(json_str)
        data = data_json["data"]
        model_type = data_json["model_type"]
        data_description = data_json["data_description"]

        # Save to CSV (inside /content so it shows up in Colab’s file sidebar)
        df = pd.DataFrame(data)
        base_dir = "/content/synthetic_data"
        os.makedirs(base_dir, exist_ok=True)

        file_name = f"{model_type}_{data_description.replace(' ', '_')}.csv"
        file_path = os.path.join(base_dir, file_name)

        df.to_csv(file_path, index=False)

        return {
            "model_type": model_type,
            "data_description": data_description,
            "file_path": file_path
        }

    except Exception as e:
        print(f"Error in Llama generation: {str(e)}")
        return {
            "error": str(e),
            "model_type": "Llama-3.1"
        }

# LLaMA conversation loop

In [None]:
import json
import re
import torch
from threading import Thread
from transformers import TextIteratorStreamer

# -------------------------------------------------
#  MAIN STREAMING FUNCTION
# -------------------------------------------------
def llama_conversation_stream(user_prompt: str):
    try:
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

        example_interaction = """
User: Generate 10 rows for demo.
Assistant: Sure – here’s a preview of the dataset I will create …

{
  "name": "generate_synthetic_data_with_llama",
  "arguments": {
    "schema": "{\"features\": [{\"name\": \"age\", \"type\": \"int\"}], \"target\": {\"name\": \"is_churn\", \"type\": \"bool\"}}",
    "count": 10
  }
}
"""

        messages = [
    {
        "role": "system",
        "content": (
            "You are a helpful assistant.\n\n"
            "When the user asks for synthetic data:\n"
            "1. First, respond conversationally to confirm that you understand and are eager to generate the data.\n"
            "2. Then, on a NEW line, output a **single JSON object** that calls the tool.\n"
            "   The JSON must have the following format:\n"
            "   {\n"
            "     \"name\": \"generate_synthetic_data_with_llama\",\n"
            "     \"arguments\": {\n"
            "       \"schema\": \"<escaped-schema-string>\",\n"
            "       \"count\": <int>\n"
            "     }\n"
            "   }\n\n"
            "Important rules:\n"
            "• The 'schema' value must be a valid JSON string with quotes escaped (\\\").\n"
            "• Do NOT wrap the JSON in markdown.\n"
            "• Do NOT add any text after the JSON."
        )
    },
    {
        "role": "system",
        "content": "Example behaviour:\n" + example_interaction
    },
    {"role": "user", "content": user_prompt}
    ]

        inputs = llama_tokenizer.apply_chat_template(
            messages, return_tensors="pt",
            max_length=2048, truncation=True
        ).to(llama_model.device)

        if torch.cuda.is_available():
            inputs = inputs.cuda()

        streamer = TextIteratorStreamer(llama_tokenizer, skip_prompt=True)
        gen_kwargs = dict(
            inputs=inputs,
            streamer=streamer,
            max_new_tokens=1000,
            do_sample=True,
            temperature=0.7,
            pad_token_id=llama_tokenizer.eos_token_id
        )
        Thread(target=llama_model.generate, kwargs=gen_kwargs).start()

        result = ""
        func_name = None
        args = None

        for chunk in streamer:
            result += chunk
            yield result

            clean = result.replace("<|eot_id|>", "").strip()
            m = re.search(r"\{[\s\S]*\}", clean)
            if not m:
                continue
            try:
                parsed = json.loads(m.group(0))
            except json.JSONDecodeError:
                continue

            if "name" in parsed and "arguments" in parsed:
                func_name = parsed["name"]
                args = parsed["arguments"]
                break

        # ----------------  TOOL EXECUTION  ---------------------------------
        if func_name == "generate_synthetic_data_with_llama":
            data = generate_synthetic_data_with_llama(**args)
            followup = (
                f"\n\n✅ Tool executed successfully!\n"
                f"📂 File: {data['file_path']}\n"
                f"📊 Model: {data['model_type']}\n"
                f"📝 Description: {data['data_description']}"
            )
            result += followup
            yield result
        elif func_name:
            result += f"\n\n⚠️ Tool '{func_name}' not implemented."
            yield result
        else:
            result += "\n\n⚠️ No valid tool call detected."
            yield result

    except Exception as exc:
        yield f"\n\n❌ Error: {exc}"

# Promplt example

In [None]:
# "Generate synthetic data for a customer churn classification problem with 10 rows."

In [None]:
import gradio as gr

view = gr.Interface(
    fn=llama_conversation_stream,
    inputs=[gr.Textbox(label="Your message:")],
    outputs=[gr.Markdown(label="Response:")],
    flagging_mode="never"
)
view.launch()

# Example of the past usage:

![Sample Image](pictures/screenshot_example.png)

### Note: While the model “Meta-Llama-3.1-8B-Instruct” is not fine-tuned for working with tools and this notebook may not be very practical, it serves as an excellent demonstration of how simple the concept of tools can be. We can implement a logic block that detects specific patterns in text (e.g., special tokens, JSON, etc.), extracts that portion from the response, and redirects it as a parameter to the corresponding tool. Then we can simply calling the conversational model again, pasting it the part of text before the tool call and providing it the tool call summary. Or we can simply hardcode the ending of the response after successful tool call!