In [10]:
!pip install groq



In [None]:
# ----------------------------
# Groq Setup
# ----------------------------
import os
from langsmith import Client
from groq import Groq
from pydantic import BaseModel, Field
from dotenv import load_dotenv

# Load environment variables (optional)
load_dotenv(dotenv_path="../../.env", override=True)

os.environ["GROQ_API_KEY"] = ""   # ✅ Use Groq instead
os.environ["LANGSMITH_API_KEY"] = ""
os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_PROJECT"] = "langsmith-academy"

# Initialize LangSmith client
client = Client()

# Initialize Groq client
groq_client = Groq(api_key=os.getenv(""))

# ----------------------------
# Load Dataset
# ----------------------------
dataset = client.clone_public_dataset(
    "https://smith.langchain.com/public/89ef0d44-a252-4011-8bb8-6a114afc1522/d"
)

# ----------------------------
# Define Toxicity Pydantic Model
# ----------------------------
class Toxicity(BaseModel):
    toxicity: str = Field(
        description="""'Toxic' if this statement is toxic, 'Not toxic' if the statement is not toxic."""
    )

# ----------------------------
# Groq Classifier Function
# ----------------------------
def good_classifier(inputs: dict) -> dict:
    """
    Uses Groq chat model to classify a statement as 'Toxic' or 'Not toxic'.
    """
    # Construct prompt
    prompt = f"This is the statement: {inputs['statement']}\nClassify it as 'Toxic' or 'Not toxic'."

    # Send request to Groq chat model
    completion = groq_client.chat.completions.create(
        model="openai/gpt-oss-120b",  # Groq model
        messages=[{"role": "user", "content": prompt}],
    )

    # Extract text response
    response_text = completion.choices[0].message.content.strip()

    # Ensure valid output
    toxicity_label = "Toxic" if "toxic" in response_text.lower() else "Not toxic"
    return {"class": toxicity_label}

# ----------------------------
# Summary Evaluator: F1 Score
# ----------------------------
def f1_score_summary_evaluator(outputs: list[dict], reference_outputs: list[dict]) -> dict:
    true_positives = 0
    false_positives = 0
    false_negatives = 0

    for output_dict, reference_output_dict in zip(outputs, reference_outputs):
        output = output_dict["class"]
        reference_output = reference_output_dict["class"]

        if output == "Toxic" and reference_output == "Toxic":
            true_positives += 1
        elif output == "Toxic" and reference_output == "Not toxic":
            false_positives += 1
        elif output == "Not toxic" and reference_output == "Toxic":
            false_negatives += 1

    if true_positives == 0:
        return {"key": "f1_score", "score": 0.0}

    precision = true_positives / (true_positives + false_positives)
    recall = true_positives / (true_positives + false_negatives)
    f1_score = 2 * (precision * recall) / (precision + recall)
    return {"key": "f1_score", "score": f1_score}

# ----------------------------
# Run Evaluation
# ----------------------------
results = client.evaluate(
    good_classifier,
    data=dataset,
    summary_evaluators=[f1_score_summary_evaluator],
    experiment_prefix="Good Groq Classifier"
)

print("Evaluation complete! Check LangSmith dashboard for results.")


View the evaluation results for experiment: 'Good Groq Classifier-878a7eb0' at:
https://smith.langchain.com/o/fbc0b987-03c7-4ba9-82d3-c46df3d88773/datasets/ced9c8bf-cc09-4177-95cf-222e2ea1d7cf/compare?selectedSessions=f901982a-c24e-4415-b2bf-846de746c00f




0it [00:00, ?it/s]

Evaluation complete! Check LangSmith dashboard for results.


In [None]:
!pip install langchain_groq

Collecting langchain_groq
  Downloading langchain_groq-0.3.8-py3-none-any.whl.metadata (2.6 kB)
Downloading langchain_groq-0.3.8-py3-none-any.whl (16 kB)
Installing collected packages: langchain_groq
Successfully installed langchain_groq-0.3.8
