<a href="https://colab.research.google.com/github/AtharvaNawadkar/gen-ai-experiments/blob/patch-1/Experiments/instructor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Exploring Instructor

Python library for getting structured outputs from LLMs

Github: https://github.com/jxnl/instructor

Observations:
- works well with OpenAI and Anthropic models
- facing error when working with Together models

Experiments:
- Test for MCQ creation
- Test for Resume Parsing
- Integration with Educhain


In [None]:
!pip install -qU instructor openai anthropic

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m325.5/325.5 kB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m862.7/862.7 kB[0m [31m50.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m327.6/327.6 kB[0m [31m27.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import os
from google.colab import userdata

os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')
os.environ["ANTHROPIC_API_KEY"] = userdata.get('ANTHROPIC_API_KEY')
os.environ["DEEPINFRA_API_KEY"] = userdata.get('DEEPINFRA_API_KEY')

SecretNotFoundError: Secret ANTHROPIC_API_KEY does not exist.

In [None]:
import instructor
from pydantic import BaseModel
from openai import OpenAI

class UserInfo(BaseModel):
  name: str
  age: int

client = instructor.from_openai(OpenAI())


user_info = client.chat.completions.create(
    model = "gpt-3.5-turbo",
    response_model = UserInfo,
    messages = [{"role" : "user", "content" : "Build Fast with AI is 6 months old."}]
)

print(user_info.name)
print(user_info.age)

Build Fast with AI
6


In [None]:
from typing import List, Dict, Any, Optional
from pydantic import BaseModel, Field, validator

## Models for tracking MCQs
class Option(BaseModel):
    text: str = Field(description="The text of the option.")
    correct: str = Field(description="Whether the option is correct or not. Either 'true' or 'false'")


class MCQ(BaseModel):
    question: str = Field(description="The quiz question")
    options: List[Option] = Field(description="The possible answers to the question. The list should contain 4 options.")
    explanation: str = Field(default=None, description="Explanation of the question")
    blooms_level: str = Field(default=None, description="The Bloom's taxonomy level of the question")
    difficulty_level: str = Field(default=None, description="The difficulty level of the question. Can be 'easy', 'medium' or 'hard' ")
    difficulty_rating: int = Field(ge=1, le=5, description="The difficulty rating of the question (1-5)")
    metadata: Dict[str, Any] = Field(default={}, description="Additional metadata for the question.")

class MCQList(BaseModel):
    questions: List[MCQ]

client = instructor.from_openai(OpenAI())

questions = client.chat.completions.create(
    model = "gpt-3.5-turbo",
    response_model = MCQList,
    messages = [{"role" : "user", "content" : "Create 2 MCQs on Human Anatomy"}]
)

print(questions)

questions=[MCQ(question='Which of the following is not a bone in the human body?', options=[Option(text='Femur', correct='false'), Option(text='Mandible', correct='false'), Option(text='Sternum', correct='false'), Option(text='Fibula', correct='true')], explanation=None, blooms_level=None, difficulty_level=None, difficulty_rating=3, metadata={}), MCQ(question='Which organ is responsible for filtering blood in the human body?', options=[Option(text='Liver', correct='false'), Option(text='Kidneys', correct='true'), Option(text='Pancreas', correct='false'), Option(text='Lungs', correct='false')], explanation=None, blooms_level=None, difficulty_level=None, difficulty_rating=2, metadata={})]


In [None]:
print(questions.model_dump_json(indent= 2))

{
  "questions": [
    {
      "question": "Which of the following is not a bone in the human body?",
      "options": [
        {
          "text": "Femur",
          "correct": "false"
        },
        {
          "text": "Mandible",
          "correct": "false"
        },
        {
          "text": "Sternum",
          "correct": "false"
        },
        {
          "text": "Fibula",
          "correct": "true"
        }
      ],
      "explanation": null,
      "blooms_level": null,
      "difficulty_level": null,
      "difficulty_rating": 3,
      "metadata": {}
    },
    {
      "question": "Which organ is responsible for filtering blood in the human body?",
      "options": [
        {
          "text": "Liver",
          "correct": "false"
        },
        {
          "text": "Kidneys",
          "correct": "true"
        },
        {
          "text": "Pancreas",
          "correct": "false"
        },
        {
          "text": "Lungs",
          "correct": "false"


In [None]:
# testing with Anthropic models

from anthropic import Anthropic

anthropic_client = instructor.from_anthropic(Anthropic())

# note that client.chat.completions.create will also work
resp = anthropic_client.messages.create(
    model="claude-3-haiku-20240307",
    max_tokens=1024,
    messages=[
        {
            "role": "user",
            "content": "Create 3 MCQs on Thermodynamics",
        }
    ],
    response_model=MCQList,
)

print(resp.model_dump_json(indent = 2))

{
  "questions": [
    {
      "question": "What is the first law of thermodynamics?",
      "options": [
        {
          "text": "Energy can be created or destroyed.",
          "correct": "false"
        },
        {
          "text": "Energy can be transformed from one form to another, but the total energy of an isolated system is constant.",
          "correct": "true"
        },
        {
          "text": "Energy can be converted into work, and work can be converted into energy.",
          "correct": "true"
        },
        {
          "text": "Energy can be neither created nor destroyed, but it can be transformed from one form to another.",
          "correct": "true"
        }
      ],
      "explanation": "The first law of thermodynamics states that energy can be transformed from one form to another, but the total energy of an isolated system is constant. It also states that energy can be converted into work, and work can be converted into energy.",
      "blooms_level"

In [None]:
# testing with Anyscale model

import os
import openai
from pydantic import BaseModel
import instructor

client = openai.OpenAI(
    base_url="https://api.endpoints.anyscale.com/v1",
    api_key= userdata.get('ANYSCALE_API_KEY'),
)

# Patch the client to use Instructor's tools mode for structured outputs
client = instructor.from_openai(client)

# Define a Pydantic model for the structured response
class UserExtract(BaseModel):
    name: str
    age: int

# Use the patched client to create a chat completion with a structured response
user: UserExtract = client.chat.completions.create(
    model="meta-llama/Meta-Llama-3-70B-Instruct",
    response_model=UserExtract,
    messages=[
        {"role": "user", "content": "Extract Jason is 25 years old"},
    ],
)


# Output the structured data as JSON
print(user.model_dump_json(indent=2))


AssertionError: Instructor does not support multiple tool calls, use List[Model] instead.

In [None]:
# testing with Together model

import os
import openai
from pydantic import BaseModel
import instructor

# Set up the Together AI client with your API key
client = openai.OpenAI(
    base_url="https://api.together.xyz/v1",
    api_key= userdata.get('TOGETHER_API_KEY'),
)

# Patch the client to use Instructor's tools mode for structured outputs
client = instructor.from_openai(client, mode=instructor.Mode.TOOLS)

# Define a Pydantic model for the structured response
class UserExtract(BaseModel):
    name: str
    age: int

# Use the patched client to create a chat completion with a structured response
user: UserExtract = client.chat.completions.create(
    model="mistralai/Mixtral-8x7B-Instruct-v0.1",
    response_model=UserExtract,
    messages=[
        {"role": "user", "content": "Extract Jason is 25 years old"},
    ],
)

# Assertions to ensure the response matches the expected structure
assert isinstance(user, UserExtract), "Should be instance of UserExtract"
assert user.name.lower() == "jason"
assert user.age == 25

# Output the structured data as JSON
print(user.model_dump_json(indent=2))


RateLimitError: Error code: 429 - {'error': {'message': 'Credit limit exceeded. Please visit https://api.together.xyz to update your credit settings.', 'type': 'credit_limit', 'param': None, 'code': None}}