In [4]:
#load data 
import os
import random 
import itertools
import warnings
from typing import List

from langchain.chains import QAGenerationChain
from langchain.chat_models import ChatOpenAI
from langchain_openai import OpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter,MarkdownTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.evaluation.qa import QAEvalChain
from langchain.prompts import PromptTemplate
from langchain.output_parsers import ResponseSchema,StructuredOutputParser
import json
## querying the model with the prompt template
from langchain.chains import LLMChain
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field

     

In [5]:

teacher_prompt_template = """You are the teacher in a quiz setting, and the student has requested question in detail for evaluation based on the text provided. The student can choose the type of question: coding, MCQ (multiple-choice), or theoretical. Provide a suitable question and its answer based on the chosen type.Please focus on the type of question asked , it is important to give exactly that kind of question as specified by the student.


TEXT: {text} # text on which the question will be based on 
QUESTION TYPE: {question_type}  # Specify 'coding', 'mcq', or 'theoretical'

# Based on the chosen question type, generate a question and answer.
If the question type is coding also write the code you are refrencing in the question or with which the question is related to. 
QUESTION: question
ANSWER: answer

"""
teacher_prompt_template2 = """You are the teacher in a quiz setting, and the student has requested a coding question with answer based on the text provided.Provide a suitable coding question and its answer.



Everything between the ``` must be valid json.

Please come up with a question/answer pair, in the specified JSON format, for the following text:
"""

In [6]:

templ = """You are a smart assistant designed to help high school teachers come up with reading comprehension questions.
Given a piece of text, you must come up with a question and answer pair that can be used to test a student's reading comprehension abilities.
When coming up with this question/answer pair, you must respond in the following format:
```
{{
    "question": "$YOUR_QUESTION_HERE",
    "answer": "$THE_ANSWER_HERE"
}}
```

Everything between the ``` must be valid json.

Please come up with a question/answer pair, in the specified JSON format, for the following text:
----------------
{text}"""

In [124]:

from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser

question_schema = ResponseSchema(name="question",
                             description="This is the detailed specific type question based on the text as requested by the student")

answer_schema = ResponseSchema(name="answer",
                                      description="This is the answer to the question generated ")


response_schemas = [question_schema,
                    answer_schema
                    ]
     

output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()

In [125]:

QA_PROMPT_TEMPLATE = PromptTemplate(
    input_variables=["text","question_type"],
    template=teacher_prompt_template)

In [126]:
PROMPT = PromptTemplate.from_template(templ)

def generate_eval(text: str, num_questions: int, chunk: int,question_type: str):
    """
    Generate eval set
    @param text: text to generate eval set from
    @param num_questions: number of questions to generate
    @param chunk: chunk size to draw question from in the doc
    @return: eval set as JSON list
    """
    #length of text
    n = len(text)
    if n < chunk:
        raise ValueError("Text length should be greater than or equal to the chunk size.")
    #starting indices of random chunks 
    starting_indices = [random.randint(0, n - chunk) for _ in range(num_questions)]
    #getting chunks from indices
    sub_sequences = [text[i:i + chunk] for i in starting_indices]
    
    # Set the grading prompt based on the grade_answer_prompt parameter
    prompt =QA_PROMPT_TEMPLATE

    #Initializing QAGeneration chain
    
    chain = LLMChain(llm=OpenAI(), prompt=prompt)

    #chain = QAGenerationChain.from_llm(llm=OpenAI(),prompt=prompt)
    eval_set = []

    # Generate a cool name based on the template
    response=chain.run(text=sub_sequences,question_type=question_type,number=num_questions)
    
    # Print the raw response for debugging
    print("Raw Response:", response)
    
    return response




In [143]:
res=generate_eval(text,1,300,'mcq')

Raw Response: QUESTION: Which algorithm is used to minimize the cost function J in linear regression?
a) Gradient Descent
b) Stochastic Gradient Descent
c) Normal Equation
d) Batch Gradient Descent
ANSWER: d) Batch Gradient Descent


In [161]:
res='''
Raw Response: MCQ: What does the decorator in FastAPI indicate?
a) The URL path of the function.
b) The type of data to be returned.
c) The HTTP method that the function will respond to.
d) The name of the routing function.

ANSWER: c) The HTTP method that the function will respond to.'''

In [162]:
# Splitting the response into question and answer parts
try:
        split_response = res.split("QUESTION:")[1]
except:
        split_response = res.split("MCQ:")[1]
question = split_response.split("ANSWER:")[0].strip()
answer = split_response.split("ANSWER:")[1].strip()

# Creating a dictionary
response_dict = {
    "question": question,
    "answer": answer
}

print(response_dict)


{'question': 'What does the decorator in FastAPI indicate?\na) The URL path of the function.\nb) The type of data to be returned.\nc) The HTTP method that the function will respond to.\nd) The name of the routing function.', 'answer': 'c) The HTTP method that the function will respond to.'}


In [159]:
split_response = res.split("MCQ:")
split_response[1].split('ANSWER:')[0].strip()

'What does the decorator in FastAPI indicate?\na) The URL path of the function.\nb) The type of data to be returned.\nc) The HTTP method that the function will respond to.\nd) The name of the routing function.'

In [142]:
for r in res.split('ANSWER'):
    

QUESTION: Write the code for the batch gradient descent algorithm.

: The code for the batch gradient descent algorithm would depend on the programming language being used. In general, it would involve using a loop to iterate through the data set, calculating the gradients using the partial derivatives, and updating the values of B0 and B1 accordingly. An example of the code in Python might look like this:

```
# Initialize values of B0 and B1
B0 = 0
B1 = 0

# Set learning rate
alpha = 0.01

# Set number of iterations
num_iterations = 1000

# Loop through data set
for i in range(num_iterations):
  # Calculate gradients using partial derivatives
  d_B0 = 0
  d_B1 = 0
  for data_point in data_set:
    d_B0 += (B0 + B1 * data_point[0] - data_point[1])
    d_B1 += (B0 + B1 * data_point[0] - data_point[1]) * data_point[0]

  # Update values of B0 and B1
  B0 = B0 - alpha * d_B0
  B1 = B1


In [None]:
#no. of users, courses ,no. of documents generated ,no. of documents-ready, no. of documents-processing ,main dashboard, saturday deadline

In [5]:
from langchain.chains.summarize import load_summarize_chain
# Function to change our long text about a person into documents
def split_text(user_information):
    # First we make our text splitter
    text_splitter = MarkdownTextSplitter(chunk_size=2000, chunk_overlap=200)

    # Then we split our user information into different documents
    docs = text_splitter.create_documents([user_information])

    return docs

# Prompts - We'll do a dynamic prompt based on the option the users selects
# We'll hold different instructions in this dictionary below
response_types = {
    'Interview Questions' : """
        Your goal is to generate interview questions that we can ask them
        Please respond with list of a few interview questions based on the topics above
    """,
    '1-Page Summary' : """
        Your goal is to generate a 1 page summary about them
        Please respond with a few short paragraphs that would prepare someone to talk to this person
    """
}

map_prompt = """You are a helpful AI bot that aids a user in research.
Below is information about a person named {persons_name}.
Information will include tweets, interview transcripts, and blog posts about {persons_name}
Use specifics from the research when possible

{response_type}

% START OF INFORMATION ABOUT {persons_name}:
{text}
% END OF INFORMATION ABOUT {persons_name}:

YOUR RESPONSE:"""
map_prompt2="""You are a smart assistant designed to help teachers come up with {question_type} questions.
Given a piece of text, you must come up with a {question_type} question and answer pair based on text that can be used to test a student's understanding of the text content.
When coming up with this {question_type} question/answer pair, you must respond in the following format:
```
{{
    "question": "$YOUR_QUESTION_HERE",
    "answer": "$THE_ANSWER_HERE"
}}
```

Everything between the ``` must be valid json.

Please come up with a {question_type} question/answer pair, in the specified JSON format, for the following text:
----------------
{text}"""
map_prompt_template = PromptTemplate(template=map_prompt, input_variables=["text", "persons_name", "response_type"])

In [6]:
combine_prompt = """
You are a helpful AI bot that aids a user in research.
You will be given information about {persons_name}.
Do not make anything up, only use information which is in the person's context

{response_type}

% PERSON CONTEXT
{text}

% YOUR RESPONSE:
"""
combine_prompt_template = PromptTemplate(template=combine_prompt, input_variables=["text", "persons_name", "response_type"])


In [7]:
user_information='ks'

In [8]:
user_information_docs = split_text(user_information)

# Calls the function above
llm = ChatOpenAI(temperature=.7, max_tokens=2000, model_name='gpt-4')

chain = load_summarize_chain(llm,
                                chain_type="map_reduce",
                                map_prompt=map_prompt_template,
                                combine_prompt=combine_prompt_template,
                                # verbose=True
                                )

In [9]:
# Here we will pass our user information we gathered, the persons name and the response type from the radio button
output = chain({"input_documents": user_information_docs, # The seven docs that were created before
                "persons_name": person_name,
                "response_type" : response_types[output_type]
                })


NameError: name 'person_name' is not defined

In [10]:
# Open the Markdown file in read mode
with open('../1.md', 'r', encoding='utf-8') as file:
    # Read the contents of the file
    markdown_content = file.read()

In [11]:
def split_text(content):
    # First we make our text splitter
    text_splitter = MarkdownTextSplitter(chunk_size=2000, chunk_overlap=200)

    # Then we split our user information into different documents
    docs = text_splitter.create_documents([content])

    return docs


In [12]:
len(markdown_content)

4924

In [13]:
markdown_docs=split_text(markdown_content)

In [14]:
len(markdown_docs)

4

In [15]:
llm = OpenAI(temperature=0, max_tokens=2000)

In [16]:
from langchain.output_parsers import ResponseSchema 
from langchain.output_parsers import StructuredOutputParser 
brand_name_schema = ResponseSchema(name="brand_name", description="This is the name of the brand") 
likelihood_of_success_schema = ResponseSchema(name="likelihood_of_success", description="This is an integer score between 1-10") 
reasoning_schema = ResponseSchema(name="reasoning", description="This is the reasons for the score") 
response_schemas = [brand_name_schema, likelihood_of_success_schema, reasoning_schema]

In [17]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import List

# Define your desired data structure.
class Topic(BaseModel):
    title: List = Field(description="List of {n} titles of the topics extracted from the provided markdown text")
    description: List = Field(description="List of {n} descripitons of the topics extracted from the provided markdown text")

output_parser = JsonOutputParser(pydantic_object=Topic)
format_instructions = output_parser.get_format_instructions()

In [18]:
topic_template="""Extract {n} different educational topics covered in the provided markdown text from which different questions can be asked.
                    Generate {n} different educational topics based on the following text:
                    \n {text_input}\n
                    {format_instructions}"""
TOPIC_TEMPLATE2="""
Extract {n} educational topics covered in the provided markdown text from which questions can be formulated.
Generate {n} educational topics based on the following text:

{text_input}

{format_instructions}
"""

In [24]:
topic_extraction_prompt = PromptTemplate( input_variables=["text_input","n"] ,partial_variables={"format_instructions":format_instructions},template=TOPIC_TEMPLATE2)

In [29]:
chain = topic_extraction_prompt | llm | output_parser
raw_output=chain.invoke({"text_input":markdown_docs,
              "n":10})


In [30]:
raw_output

{'title': ['Fast API Routing',
  'Understanding Routing in Fast API',
  'Path Parameters and Numeric Validations',
  'Query Parameters and String Validations',
  'Exercises',
  '1. Write a FastAPI application with the following routes:',
  '2. Modify the "/items/" route in the above application to add a string query parameter `sort` with a default value of "asc".',
  '3. What will happen if you send a GET request to the "/greet/{name}" route of the above application without providing a `name` in the URL?',
  '4. Explain the role of decorators in FastAPI routing.',
  'External Resource Links'],
 'description': ['Putting signboards on the code highway to avoid getting lost!',
  'Routing in Fast API is a way to direct the incoming HTTP requests to specific resources or functions based on the URL path.',
  'Path parameters are variables in the URL path that are processed as input parameters by the routing function.',
  'Query parameters are additional inputs provided in the URL after a `?`

In [32]:
topics

{'topic_1': {'title': 'Fast API Routing',
  'description': 'Understanding routing in Fast API and the use of decorators to handle different types of HTTP requests',
  'type': 'string'},
 'topic_2': {'title': 'Path Parameters and Numeric Validations',
  'description': "Defining and validating path parameters in Fast API using Python's type hints",
  'type': 'string'},
 'topic_3': {'title': 'Query Parameters and String Validations',
  'description': "Using query parameters to filter and sort data in Fast API and validating them using Python's built-in type hints",
  'type': 'string'},
 'topic_4': {'title': 'External Resource Links',
  'description': 'Links to external resources for further learning on Fast API routing',
  'type': 'string'}}

In [33]:
topic_extraction_chain = LLMChain(llm=llm, prompt=topic_extraction_prompt,output_parser=output_parser) 

In [34]:
qa_prompt_template="""You are a smart assistant designed to help teachers come up with a single detailed {question_type} question.
Given a list of topics, you must come up with a detailed {question_type} question that can be used to test a student's understanding of the topic content.
Please focus on the type of question asked, that is detailed {question_type} question, only generate a single detailed {question_type} question based on topics.
When coming up with this {question_type} question and answer, you must respond in the following format:

QUESTION: DETAILED_{question_type}_QUESTION_HERE,
ANSWER: THE_ANSWER_HERE

Please come up with a single detailed {question_type} question and answer ,in a dictionary format ,from the following list of topics:
----------------
{topics}"""
QA_PROMPT_TEMPLATE = PromptTemplate(
    input_variables=["topics","question_type"],
    template=qa_prompt_template)


In [35]:
# Define your desired data structure.
class MCQ(BaseModel):
    question: str = Field(description="The MCQ Question")
    A:str =Field(description="The first option of the MCQ")
    B:str =Field(description="The second option of the MCQ")
    C:str =Field(description="The third option of the MCQ")
    D:str =Field(description="The fourth option of the MCQ")
    answer:str = Field(description="Answer of the MCQ Question")

output_parser = JsonOutputParser(pydantic_object=MCQ)
format_instructions = output_parser.get_format_instructions()

In [88]:
# Define your desired data structure.
class Coding(BaseModel):
    question: str = Field(description="The Coding Question along with the code (if required)")
    answer:str = Field(description="Code answer in language specified in the question")

output_parser = JsonOutputParser(pydantic_object=MCQ)
format_instructions = output_parser.get_format_instructions()

In [89]:
qa_prompt_template="""You are a smart assistant designed to help teachers come up with detailed {question_type} question.
Given a topic, you must come up with a detailed {question_type} question that can be used to test a student's understanding of the topic .
Please focus on the type of question asked, that is detailed {question_type} question, only generate a single detailed {question_type} question based on the topic.

Please come up with a single detailed {question_type} question and answer ,in a dictionary format ,from the following topic:
----------------
TOPIC TITLE: {topic_title}
TOPIC DESCRIPTION: {topic_description}\n
{format_instructions}
"""
qa_prompt_template2="""
You are a smart assistant designed to help teachers come up with detailed {question_type} questions.
Given a topic, your task is to generate a detailed {question_type} question aimed at testing a student's understanding of the topic.
Please focus on crafting a single detailed {question_type} question along with its answer, presented in a dictionary format, based on the provided topic:

----------------
TOPIC TITLE: {topic_title}
TOPIC DESCRIPTION: {topic_description}

{format_instructions}
"""
QA_PROMPT_TEMPLATE = PromptTemplate(
    input_variables=["topics","question_type","topic_title","topic_description"],
    partial_variables={"format_instructions":format_instructions},
    template=qa_prompt_template2)

In [99]:
# Set the grading prompt based on the grade_answer_prompt parameter
question_type='MCQ'
i=0

In [100]:
llm=OpenAI(name='gpt-4')

In [101]:
chain = QA_PROMPT_TEMPLATE | llm | output_parser
raw_output=chain.invoke({"topic_title":topics[f"topic_{i+1}"]["title"],"topic_description":topics[f"topic_{i+1}"]["description"],"question_type":question_type})

In [102]:
for i in raw_output.keys():
    print(raw_output[i])
    print('????????????????????????????????????')

Which of the following decorators is used to handle GET requests in Fast API routing?
????????????????????????????????????
@app.get
????????????????????????????????????
@app.post
????????????????????????????????????
@app.put
????????????????????????????????????
@app.delete
????????????????????????????????????
A
????????????????????????????????????


In [264]:
raw_output

{'question': "Create a Fast API endpoint that accepts a path parameter for a specific user and a query parameter for sorting the user's data by date. The endpoint should return a JSON response with the sorted data.",
 'question_code': "from fastapi import FastAPI\n\napp = FastAPI()\n\n@app.get('/users/{user_id}')\ndef get_user_data(user_id: int, sort_by: str):\n    # code to retrieve user data\n    # code to sort data by date\n    # return sorted data as JSON response",
 'answer': "from fastapi import FastAPI\n\napp = FastAPI()\n\n@app.get('/users/{user_id}')\ndef get_user_data(user_id: int, sort_by: str):\n    # code to retrieve user data\n    # code to sort data by date\n    # return sorted data as JSON response"}

In [263]:
print(raw_output['question_code'])

from fastapi import FastAPI

app = FastAPI()

@app.get('/users/{user_id}')
def get_user_data(user_id: int, sort_by: str):
    # code to retrieve user data
    # code to sort data by date
    # return sorted data as JSON response


In [215]:

#Initializing QAGeneration chain
i=0
chain = LLMChain(llm=OpenAI(temperature=0,max_tokens=2000),prompt=QA_PROMPT_TEMPLATE)


#chain = QAGenerationChain.from_llm(llm=OpenAI())
eval_set = []

# Generate a cool name based on the template
response=chain.run(topic_title=topics[f"topic_{i+1}"]["title"],topic_description=topics[f"topic_{i+1}"]["description"],question_type=question_type)
# Print the raw response for debugging
print("Raw Response:", response)


Raw Response: 

QUESTION: What is the role of decorators in Fast API routing?
ANSWER: Decorators are used to define the path and HTTP method for a specific function, allowing Fast API to handle different types of HTTP requests.


In [139]:
def parse(response):
    try:
        split_response = response.split("QUESTION:")[1]
    except:
        split_response = response.split("MCQ:")[1]
    question = split_response.split("ANSWER:")[0].strip()
    answer = split_response.split("ANSWER:")[1].strip()

    # Creating a dictionary
    response_dict = {
        "question": question,
        "answer": answer
    }
    return response_dict

In [140]:
parse(response)

{'question': 'What is the purpose of using decorators in FastAPI routing?\nA) To add additional functionality to the route\nB) To specify the HTTP method for the route\nC) To validate path parameters\nD) To handle query parameters',
 'answer': 'A) To add additional functionality to the route'}

In [224]:
question_type

'MCQ (Multiple Choice Question)'

In [128]:
import markdown

def convert_to_markdown(text):
    return markdown.markdown(text)

In [129]:
convert_to_markdown(response)

'<p>QUESTION: Write a FastAPI endpoint that accepts a path parameter for a user\'s ID and a query parameter for their preferred language. The endpoint should return a JSON response with the user\'s ID, name, and preferred language.\nANSWER:\n@app.get("/user/{id}")\ndef get_user(id: int, language: str):\n    # code to retrieve user information from database\n    return {"id": id, "name": user.name, "language": language}</p>'

In [8]:
teacher_prompt_template = """You are the teacher in a quiz setting, and the student has requested question in detail for evaluation based on the text provided. The student can choose the type of question: coding, MCQ (multiple-choice), or theoretical. Provide a suitable question and its answer based on the chosen type.Please focus on the type of question asked , it is important to give exactly that kind of question as specified by the student.


TEXT: {text} # text on which the question will be based on 
QUESTION TYPE: {question_type}  # Specify 'coding', 'mcq', or 'theoretical'

# Based on the chosen question type, generate a question and answer.
If the question type is coding also write the code you are refrencing in the question or with which the question is related to. 
QUESTION: question
ANSWER: answer

"""

In [9]:
qa_prompt_template="""You are a smart assistant designed to help teachers come up with detailed {question_type} question.
Given a topic, you must come up with a detailed {question_type} question that can be used to test a student's understanding of the topic .
Please focus on the type of question asked, that is detailed {question_type} question, only generate a single detailed {question_type} question based on the topic.
When coming up with this {question_type} question and answer, you must respond in the following format:

QUESTION: DETAILED_{question_type}_QUESTION_HERE,
ANSWER: THE_ANSWER_HERE

Please come up with a single detailed {question_type} question and answer ,in a dictionary format ,from the following topic:
----------------
TOPIC TITLE: {topic_title}
TOPIC DESCRIPTION: {topic_description}"""

In [10]:

# Define your desired data structure.
class Topic(BaseModel):
    topic_name: str = Field(description="Name of Topic extracted from the provided markdown text")
    topic_description: str = Field(description="Description of Topic extracted from the provided markdown text")

output_parser = JsonOutputParser(pydantic_object=Topic)
format_instructions = output_parser.get_format_instructions()
topic_template="""Extract {n} educational topics covered in the provided markdown text from which question can be asked.
                    Generate {n} educational topics based on the following text:
                    \n {text_input}\n
                    {format_instructions}"""
                    
topic_extraction_prompt = PromptTemplate( input_variables=["text_input","n"] ,partial_variables={"format_instructions":format_instructions},template=topic_template)

QA_PROMPT_TEMPLATE1 = PromptTemplate(
    input_variables=["question_type","topic_title","topic_description"],
    template=qa_prompt_template)

QA_PROMPT_TEMPLATE2 = PromptTemplate(
    input_variables=["text","question_type"],
    template=teacher_prompt_template,
)


In [43]:
def split_text(content):
    # First we make our text splitter
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)

    # Then we split our user information into different documents
    docs = text_splitter.create_documents([content])

    return docs


In [44]:
with open('../example.txt','r') as f:
    lines=f.readlines()
text=''.join([line.replace('\n',' ')for line in lines])

In [3]:

num_questions=5

markdown_docs=split_text(text)
llm = OpenAI(temperature=0)
chain = topic_extraction_prompt | llm | output_parser
raw_output=chain.invoke({"text_input":markdown_docs,
            "n":num_questions})
topics=raw_output['properties']



NameError: name 'split_text' is not defined

In [2]:
markdown_docs

NameError: name 'markdown_docs' is not defined

In [46]:
len(markdown_docs[0].to_json()['kwargs']['page_content'])

493

In [None]:

def generate_eval(text: str, num_questions: int, chunk: int,question_type: str):
    """
    Generate eval set
    @param text: text to generate eval set from
    @param num_questions: number of questions to generate
    @param chunk: chunk size to draw question from in the doc
    @return: eval set as JSON list
    """
    #length of text
    n = len(text)
    if n < chunk:
        raise ValueError("Text length should be greater than or equal to the chunk size.")
    #starting indices of random chunks 
    markdown_docs=split_text(text)
    llm = OpenAI(temperature=0, max_tokens=2000)
    chain = topic_extraction_prompt | llm | output_parser
    raw_output=chain.invoke({"text_input":markdown_docs,
                "n":num_questions})
    topics=raw_output['properties']
    # Set the grading prompt based on the grade_answer_prompt parameter
    chain = LLMChain(llm=OpenAI(temperature=0,max_tokens=2000),prompt=QA_PROMPT_TEMPLATE1)
    eval_set=[]

    #Initializing QAGeneration chain
    for i in range(num_questions):
        # Generate a cool name based on the template

        response=chain.run(topic_title=topics[f"topic_{i+1}"]["title"],topic_description=topics[f"topic_{i+1}"]["description"],question_type=question_type)
        parsed_response=parse(response)
        eval_set.append(parsed_response)
    return eval_set
