#Resources

* https://www.youtube.com/watch?v=ZzgUqFtxgXI
* https://www.youtube.com/watch?v=7aBRk_JP-qY
* https://learn.deeplearning.ai/langchain-chat-with-your-data/lesson/3/document-splitting

#Introduction

Title: Exam MCQ Generator

Game Plan:

*   Divide the document into multiple chuncks and then select chunks at random, pass them to Langchain and ask to make an MCQ out of it

#Installs

In [112]:
!pip install -q python-dotenv

!pip install -q langchain_experimental
!pip install -q langchain

!pip install -q openai

In [113]:
!pip install -q pydantic

#Imports

In [114]:
import os

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter

from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain

#Data Source

In [115]:
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [116]:
wizard_in_oz_text_path = '/content/drive/MyDrive/datasets/llm-from-scratch/wizard_in_oz.txt'

In [117]:
with open(wizard_in_oz_text_path, 'r', encoding='utf-8') as f:
  text = f.read()

In [118]:
print(text[:100])

﻿Title: Dorothy and the Wizard in Oz


Author: L. Frank Baum

Illustrator: John R. Neill

Release da


#Env

In [119]:
env_path = '/content/drive/MyDrive/credentials/data-analytics-demo/.env'

In [120]:
from dotenv import load_dotenv, find_dotenv

load_dotenv(env_path)

True

In [121]:
GOOGLE_PALM_API_KEY = os.environ['GOOGLE_PALM_API_KEY']
HUGGINGFACE_API_KEY = os.environ['HUGGINGFACE_API_KEY']
OPEN_AI_API_KEY = os.environ['OPEN_AI_API_KEY']

# GOOGLE_PALM_API_KEY, HUGGINGFACE_API_KEY, OPEN_AI_API_KEY

#Model

In [122]:
from pydantic import BaseModel, Field

class MCQModel(BaseModel):
  question: str = Field(description="This is the question text")
  options: list[str] = Field(description="This is a list of multiple choices or options avalible")
  correct_option: str = Field(description="This is the correct choice or option")

In [123]:
from langchain.output_parsers import PydanticOutputParser

pydantic_parser = PydanticOutputParser(pydantic_object=MCQModel)

format_instructions = pydantic_parser.get_format_instructions()

print(format_instructions)

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"question": {"title": "Question", "description": "This is the question text", "type": "string"}, "options": {"title": "Options", "description": "This is a list of multiple choices or options avalible", "type": "array", "items": {"type": "string"}}, "correct_option": {"title": "Correct Option", "description": "This is the correct choice or option", "type": "string"}}, "required": ["question", "options", "correct_option"]}
```


In [124]:
langchain_llm = ChatOpenAI(openai_api_key=OPEN_AI_API_KEY, model_name="gpt-3.5-turbo-0613", verbose=False,)

In [125]:
text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
    is_separator_regex=False,
)

In [126]:
texts = text_splitter.create_documents([text])

In [127]:
print(f'Total Documents Created: {len(texts)}')

Total Documents Created: 287


In [128]:
prompt = (
    "Text:"
    "\n{text}"
    "\nGenerate an Multiple Choice Question from this text. Also return the correct option."
    "\nDo not use any outside information"
    "\n{format_instructions}"
)

In [129]:
prompt_template = ChatPromptTemplate.from_template(prompt)

prompt_template

ChatPromptTemplate(input_variables=['format_instructions', 'text'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['format_instructions', 'text'], template='Text:\n{text}\nGenerate an Multiple Choice Question from this text. Also return the correct option.\nDo not use any outside information\n{format_instructions}'))])

In [130]:
llm_chain = LLMChain(llm=langchain_llm, prompt=prompt_template, output_key="result")

In [131]:
from langchain.callbacks import get_openai_callback as get_langchain_openai_callback
import random

def get_question():
  doc_index = random.randint(0, len(texts)-1)

  with get_langchain_openai_callback() as cb_langchain:
    response = llm_chain.invoke({"text": texts[doc_index], "format_instructions": format_instructions})

  return {'doc_index': doc_index, 'mcq': response['result'], 'cb_langchain': cb_langchain}

#Testing

In [132]:
response_raw = get_question()
response_parsed = MCQModel.parse_raw(response_raw['mcq'])

In [133]:
print(response_parsed)

print('\n')
print(f"doc_index = {response_raw['doc_index']}")
print('\ncosting:\n')
print(f"{response_raw['cb_langchain']}")

question='Who ruled the Land of Oz?' options=['The Princess', 'The Wizard', 'The Witches', 'The Munchkins'] correct_option='The Wizard'


doc_index = 218

costing:

Tokens Used: 538
	Prompt Tokens: 497
	Completion Tokens: 41
Successful Requests: 1
Total Cost (USD): $0.0008275


In [134]:
response_parsed

MCQModel(question='Who ruled the Land of Oz?', options=['The Princess', 'The Wizard', 'The Witches', 'The Munchkins'], correct_option='The Wizard')

#Pydantic Demo

In [135]:
from pydantic import BaseModel, EmailStr, validator

class User(BaseModel):
  name: str
  # email: EmailStr
  email: str
  account_id: int

  @validator("account_id")
  def validate_account_id(cls, value):
    if value<=0:
      raise ValueError(f'Account ID cannot be negative: {value}')

    return value

user = User(name='jack', email='a@b.com', account_id=1234)

print(user)
print(user.json())
print(user.dict())
print(user.parse_raw(user.json()))

name='jack' email='a@b.com' account_id=1234
{"name": "jack", "email": "a@b.com", "account_id": 1234}
{'name': 'jack', 'email': 'a@b.com', 'account_id': 1234}
name='jack' email='a@b.com' account_id=1234
