In [1]:

from distilabel.models import InferenceEndpointsLLM, AsyncLLM
from huggingface_hub import AsyncInferenceClient
from dotenv import load_dotenv

import os
from distilabel.pipeline import Pipeline
from distilabel.steps import (
    LoadDataFromHub,
    GroupColumns,
    FormatTextGenerationDPO,
    PreferenceToArgilla,
)
from distilabel.steps.tasks import TextGeneration, UltraFeedback

In [2]:
# Load the environment variables from the .env file using python-dotenv

from dotenv import load_dotenv  # Import the library
load_dotenv() 
hf_token = os.getenv("HF_TOKEN") 
if not hf_token:
    raise ValueError("HF_TOKEN not found in .env file or environment variables.")


In [3]:
load_dataset = LoadDataFromHub(
        repo_id= "argilla/10Kprompts-mini",
        num_examples=1,
        pipeline=Pipeline(name="showcase-pipeline"),
    )
load_dataset.load()
next(load_dataset.process())

Using the latest cached version of the dataset since argilla/10Kprompts-mini couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'default' at C:\Users\alix\.cache\huggingface\datasets\argilla___10_kprompts-mini\default\0.0.0\cf99b34e5949c3e2310321438fb201cf27647d01 (last modified on Fri Jun 13 18:34:11 2025).
Using the latest cached version of the dataset since argilla/10Kprompts-mini couldn't be found on the Hugging Face Hub
Using the latest cached version of the dataset since argilla/10Kprompts-mini couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'default' at C:\Users\alix\.cache\huggingface\datasets\argilla___10_kprompts-mini\default\0.0.0\cf99b34e5949c3e2310321438fb201cf27647d01 (last modified on Fri Jun 13 18:34:11 2025).


([{'instruction': 'How can I create an efficient and robust workflow that utilizes advanced automation techniques to extract targeted data, including customer information, from diverse PDF documents and effortlessly integrate it into a designated Google Sheet? Furthermore, I am interested in establishing a comprehensive and seamless system that promptly activates an SMS notification on my mobile device whenever a new PDF document is uploaded to the Google Sheet, ensuring real-time updates and enhanced accessibility.',
   'topic': 'Software Development'}],
 True)

In [5]:
load_dotenv()
hf_token = os.getenv("HF_TOKEN")
if not hf_token:
    raise ValueError("HF_TOKEN not found in .env file or environment variables.")

class PatchedInferenceEndpointsLLM(InferenceEndpointsLLM):
    def load(self) -> None:
        AsyncLLM.load(self)
        self._aclient = AsyncInferenceClient(
            model=self.model_id,
            token=self.api_key.get_secret_value() if self.api_key else None
        )

with Pipeline(name="showcase-pipeline") as pipeline:
    models_to_run = [
        "meta-llama/Meta-Llama-3-8B-Instruct",
        "mistralai/Mixtral-8x7B-Instruct-v0.1"
    ]

    for model in models_to_run:
        safe_model_name = model.replace('/', '_').replace('.', '_')

        task = TextGeneration(
            name=f"generate_for_{safe_model_name}", # Use the safe name
            llm=PatchedInferenceEndpointsLLM(
                model_id=model,
                generation_kwargs={"max_new_tokens": 512, "temperature": 0.7, "do_sample": True},
            ),
        )

        task.load()
        print(f"--- Generating with {model} ---")
        result = next(task.process([{"instruction": "Which are the top 5 cities to visit in Spain and why?"}]))
        print(result)
        print("\n")

--- Generating with meta-llama/Meta-Llama-3-8B-Instruct ---
[{'instruction': 'Which are the top 5 cities to visit in Spain and why?', 'generation': "Spain is a country with a rich history, vibrant culture, and stunning landscapes, offering countless options for travelers. Here are the top 5 cities to visit in Spain, in no particular order, along with some reasons why:\n\n1. **Madrid**:\n\t* The capital city is a must-visit, with world-class museums like the Prado, Reina Sofia, and Thyssen-Bornemisza.\n\t* Enjoy the city's lively nightlife, with many bars, restaurants, and clubs.\n\t* Explore the historic center, including the Royal Palace, Plaza Mayor, and Puerta del Sol.\n\t* Visit the famous Retiro Park, a beautiful green oasis in the heart of the city.\n2. **Barcelona**:\n\t* Discover the works of Antoni Gaudí, including the iconic Sagrada Família, Park Güell, and Casa Batlló.\n\t* Stroll along La Rambla, a famous pedestrian street lined with street performers, cafes, and shops.\n\t

In [6]:
group_responses = GroupColumns(
    columns=["generation", "model_name"],
    output_columns=["generations", "model_names"],
    pipeline=Pipeline(name="showcase-pipeline"),
)
next(
    group_responses.process(
        [
            {
                "generation": "Madrid",
                "model_name": "meta-llama/Meta-Llama-3-8B-Instruct",
            },
        ],
        [
            {
                "generation": "Barcelona",
                "model_name": "mistralai/Mixtral-8x7B-Instruct-v0.1",
            }
        ],
    )
)

[{'generations': ['Madrid', 'Barcelona'],
  'model_names': ['meta-llama/Meta-Llama-3-8B-Instruct',
   'mistralai/Mixtral-8x7B-Instruct-v0.1']}]

In [7]:
# Use the Pipeline context manager for creating and running the step
with Pipeline(name="showcase-evaluation-pipeline") as pipeline:
    # Create the UltraFeedback step inside the context
    evaluate_responses = UltraFeedback(
        name="evaluate_capital_of_spain_q", 
        aspect="overall-rating",
        llm=PatchedInferenceEndpointsLLM( 
            model_id="meta-llama/Meta-Llama-3-70B-Instruct",
            generation_kwargs={"max_new_tokens": 512, "temperature": 0.0}, # Temp 0.0 for consistent ratings
        ),
        # The `pipeline` argument is not needed when creating inside a `with` block
    )

    evaluate_responses.load()

    # Process a sample input
    result = next(
        evaluate_responses.process(
            [
                {
                    "instruction": "What's the capital of Spain?",
                    "generations": ["Madrid", "Barcelona"],
                }
            ]
        )
    )

    print(result)


[{'instruction': "What's the capital of Spain?", 'generations': ['Madrid', 'Barcelona'], 'ratings': [5, 1], 'rationales': ['The text provides accurate and helpful information, correctly stating the capital of Spain as Madrid. It is confident and free of hallucinations, perfectly aligning with the instruction.', 'The text provides inaccurate information, stating Barcelona as the capital of Spain, which is incorrect. It does not align with the instruction and contains a severe hallucination.'], 'distilabel_metadata': {'raw_output_evaluate_capital_of_spain_q': '#### Output for Text 1\nRating: 5 (Excellent)\nRationale: The text provides accurate and helpful information, correctly stating the capital of Spain as Madrid. It is confident and free of hallucinations, perfectly aligning with the instruction.\n\n#### Output for Text 2\nRating: 1 (Low Quality)\nRationale: The text provides inaccurate information, stating Barcelona as the capital of Spain, which is incorrect. It does not align with

In [8]:
format_dpo = FormatTextGenerationDPO(pipeline=Pipeline(name="showcase-pipeline"))
format_dpo.load()
next(
    format_dpo.process(
        [
            {
                "instruction": "What's the capital of Spain?",
                "generations": ["Madrid", "Barcelona"],
                "generation_models": [
                    "Meta-Llama-3-8B-Instruct",
                    "Mixtral-8x7B-Instruct-v0.1",
                ],
                "ratings": [5, 1],
            }
        ]
    )
)

[{'instruction': "What's the capital of Spain?",
  'generations': ['Madrid', 'Barcelona'],
  'generation_models': ['Meta-Llama-3-8B-Instruct',
   'Mixtral-8x7B-Instruct-v0.1'],
  'ratings': [5, 1],
  'prompt': "What's the capital of Spain?",
  'prompt_id': '26174c953df26b3049484e4721102dca6b25d2de9e3aa22aa84f25ed1c798512',
  'chosen': [{'role': 'user', 'content': "What's the capital of Spain?"},
   {'role': 'assistant', 'content': 'Madrid'}],
  'chosen_model': 'Meta-Llama-3-8B-Instruct',
  'chosen_rating': 5,
  'rejected': [{'role': 'user', 'content': "What's the capital of Spain?"},
   {'role': 'assistant', 'content': 'Barcelona'}],
  'rejected_model': 'Mixtral-8x7B-Instruct-v0.1',
  'rejected_rating': 1}]

In [9]:
from distilabel.pipeline import Pipeline
from distilabel.steps import PreferenceToArgilla
load_dotenv()

argilla_api = os.getenv("Argilla_API")
# Create the step inside the Pipeline context manager
with Pipeline(name="showcase-pipeline") as pipeline:
    to_argilla = PreferenceToArgilla(
        dataset_name="preference-dataset",
        dataset_workspace="argilla",
        # Replace with your actual Argilla URL and API key
        api_url="https://huggingface.co/spaces/AlexVal/my-argilla.hf.space",
        api_key=argilla_api,
        num_generations=2
    )

In [None]:
import os
from distilabel.pipeline import Pipeline
from distilabel.steps import (
    LoadDataFromHub,
    GroupColumns,
    FormatTextGenerationDPO,
    PreferenceToArgilla,
)
from distilabel.steps.tasks import TextGeneration, UltraFeedback

# Import the necessary classes for our patch
from distilabel.models import InferenceEndpointsLLM, AsyncLLM
from huggingface_hub import AsyncInferenceClient
from dotenv import load_dotenv


load_dotenv()
hf_token = os.getenv("HF_TOKEN")
argilla_api = os.getenv("ARGILLA_API_KEY")
if not hf_token or not argilla_api:
    raise ValueError("HF_TOKEN or ARGILLA_API_KEY not found in environment.")


with Pipeline(name="generate-dataset") as pipeline:
    load_dataset = LoadDataFromHub(
        name="load_prompts",
        repo_id="argilla/10Kprompts-mini",
        num_examples=1
    )


    generate_llama = TextGeneration(
        name="generate_llama",
        llm=PatchedInferenceEndpointsLLM(
            model_id="meta-llama/Meta-Llama-3-8B-Instruct",
            generation_kwargs={"max_new_tokens": 512, "temperature": 0.7, "do_sample": True},
        )
    )
    generate_mixtral = TextGeneration(
        name="generate_mixtral",
        llm=PatchedInferenceEndpointsLLM(
            model_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
            generation_kwargs={"max_new_tokens": 512, "temperature": 0.7, "do_sample": True},
        )
    )

    group_responses = GroupColumns(
        name="group_generations",
        columns=["generation", "model_name"],
        output_columns=["generations", "model_names"],
    )


    evaluate_responses = UltraFeedback(
        name="evaluate_with_llama_70b",
        aspect="overall-rating",
        llm=PatchedInferenceEndpointsLLM(
            model_id="meta-llama/Meta-Llama-3-70B-Instruct",
            generation_kwargs={"max_new_tokens": 512, "temperature": 0.0},
        )
    )

    format_dpo = FormatTextGenerationDPO(name="format_for_dpo")


    to_argilla = PreferenceToArgilla(
        name="upload_to_argilla",
        dataset_name="preference-dataset",
        dataset_workspace="argilla",
        api_url="https://AlexVal-my-argilla.hf.space", 
        api_key="EPyMl_vB6pv3jF0rklhw8bbl8GCtykcSq9Sc4AgWklWsHri7SohXeb4p-ogcw5oLCJ4q1UstT-XfAZrCrj-5Mhvy0dvKYIxihoSwPaGbbbg",
        num_generations=2
    )


    load_dataset.connect(generate_llama)
    load_dataset.connect(generate_mixtral)

    generate_llama.connect(group_responses)
    generate_mixtral.connect(group_responses)

    group_responses.connect(evaluate_responses)
    evaluate_responses.connect(format_dpo)
    format_dpo.connect(to_argilla)

In [None]:
# This should now run without errors. It takes a long time though.
distiset = pipeline.run()
print(distiset)