In [91]:
from fastapi import FastAPI, Form, Request, Response, File, Depends, HTTPException, status
from fastapi.responses import RedirectResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from fastapi.encoders import jsonable_encoder
from langchain.chat_models import ChatOpenAI
from langchain.chains import QAGenerationChain
from langchain.text_splitter import TokenTextSplitter
from langchain.docstore.document import Document
from langchain.document_loaders import PyPDFLoader
from langchain.prompts import PromptTemplate
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.summarize import load_summarize_chain
from langchain.chains import RetrievalQA
import os
import json
import time
import uvicorn
import aiofiles
from PyPDF2 import PdfReader
import csv

In [92]:
app = FastAPI()


templates = Jinja2Templates(directory="templates")

os.environ["OPENAI_API_KEY"] = ""


In [93]:
# Set file path
# file_path = '/content/Big Mac Index.pdf'

def count_pdf_pages(pdf_path):
    try:
        pdf = PdfReader(pdf_path)
        return len(pdf.pages)
    except Exception as e:
        print("Error:", e)
        return None

In [94]:
def file_processing(file_path):

    # Load data from PDF
    loader = PyPDFLoader(file_path)
    data = loader.load()

    question_gen = ''

    for page in data:
        question_gen += page.page_content

    splitter_ques_gen = TokenTextSplitter(
        model_name = 'gpt-3.5-turbo',
        chunk_size = 10000,
        chunk_overlap = 200
    )

    chunks_ques_gen = splitter_ques_gen.split_text(question_gen)

    document_ques_gen = [Document(page_content=t) for t in chunks_ques_gen]

    splitter_ans_gen = TokenTextSplitter(
        model_name = 'gpt-3.5-turbo',
        chunk_size = 1000,
        chunk_overlap = 100
    )


    document_answer_gen = splitter_ans_gen.split_documents(
        document_ques_gen
    )

    return document_ques_gen, document_answer_gen

In [95]:
def llm_pipeline(file_path):

    document_ques_gen, document_answer_gen = file_processing(file_path)

    llm_ques_gen_pipeline = ChatOpenAI(
        temperature = 0.3,
        model = "gpt-3.5-turbo"
    )

    prompt_template = """
    You are an expert at creating questions based on materials and documentation.
    Your goal is to prepare True or False questions for their exam tests.
    You do this by asking questions about the text below:

    ------------
    {text}
    ------------

    Create True or False type questions that will prepare the students for their tests.
    Make sure not to lose any important information.

    QUESTIONS:
    """

    PROMPT_QUESTIONS = PromptTemplate(template=prompt_template, input_variables=["text"])

    refine_template = ("""
    You are an expert at creating practice True or False questions based on material and documentation.
    Your goal is to help a learner prepare for a test.
    We have received some practice True or False questions to a certain extent: {existing_answer}.
    We have the option to refine the existing questions or add new ones.
    (only if necessary) with some more context below.
    ------------
    {text}
    ------------

    Given the new context, refine the original questions in English.
    If the context is not helpful, please provide the original questions.
    QUESTIONS:
    """
    )

    REFINE_PROMPT_QUESTIONS = PromptTemplate(
        input_variables=["existing_answer", "text"],
        template=refine_template,
    )

    ques_gen_chain = load_summarize_chain(llm = llm_ques_gen_pipeline,
                                            chain_type = "refine",
                                            verbose = True,
                                            question_prompt=PROMPT_QUESTIONS,
                                            refine_prompt=REFINE_PROMPT_QUESTIONS)

    ques = ques_gen_chain.run(document_ques_gen)

    embeddings = OpenAIEmbeddings()

    vector_store = FAISS.from_documents(document_answer_gen, embeddings)

    llm_answer_gen = ChatOpenAI(temperature=0.1, model="gpt-3.5-turbo")

    ques_list = ques.split("\n")
    filtered_ques_list = [element for element in ques_list if element.endswith('?') or element.endswith('.')]

    answer_generation_chain = RetrievalQA.from_chain_type(llm=llm_answer_gen,
                                                chain_type="stuff",
                                                retriever=vector_store.as_retriever())

    return answer_generation_chain, filtered_ques_list




# Answer each question and save to a file
# for question in question_list:
#     print("Question: ", question)
#     answer = answer_gen_chain.run(question)
#     print("Answer: ", answer)
#     print("--------------------------------------------------\\n\\n")
#     # Save answer to file
#     with open("answers.txt", "a") as f:
#         f.write("Question: " + question + "\\n")
#         f.write("Answer: " + answer + "\\n")
#         f.write("--------------------------------------------------\\n\\n")


In [96]:
def get_csv (file_path):
    answer_generation_chain, ques_list = llm_pipeline(file_path)
    base_folder = 'static/output/'
    if not os.path.isdir(base_folder):
        os.mkdir(base_folder)
    output_file = base_folder+"QA.csv"
    with open(output_file, "w", newline="", encoding="utf-8") as csvfile:
        csv_writer = csv.writer(csvfile)
        csv_writer.writerow(["Question", "Answer"])  # Writing the header row

        for question in ques_list:
            print("Question: ", question)
            answer = answer_generation_chain.run(question)
            print("Answer: ", answer)
            print("--------------------------------------------------\n\n")

            # Save answer to CSV file
            csv_writer.writerow([question, answer])
    return output_file

In [97]:
@app.get("/")
async def index(request: Request):
    return templates.TemplateResponse("index.html", {"request": request})

In [100]:
@app.post("/upload")
async def chat(request: Request, pdf_file: bytes = File(), filename: str = Form(...)):
    base_folder = 'static/docs/'
    if not os.path.isdir(base_folder):
        os.mkdir(base_folder)
    pdf_filename = os.path.join(base_folder,  'Big Mac Index.pdf')

    async with aiofiles.open(pdf_filename, 'wb') as f:
        await f.write(pdf_file)
    # page_count = count_pdf_pages(pdf_filename)
    # if page_count > 5:
    #     return Response(jsonable_encoder(json.dumps({"msg": 'error'})))
    response_data = jsonable_encoder(json.dumps({"msg": 'success',"pdf_filename": pdf_filename}))
    res = Response(response_data)
    return res


In [104]:
print(Res)

Here are 25 tricky true or false questions based on the text, along with answers:

1. True or False: The Big Mac Index was introduced in The Economist by Pam Woodall in 1986.
   - True

2. True or False: The Big Mac Index is primarily used for measuring the quality of fast-food burgers.
   - False

3. True or False: The Big Mac Index compares the relative prices of purchasing a Big Mac in different countries.
   - True

4. True or False: The Big Mac Index was initially intended to be a serious tool for evaluating exchange rates.
   - False

5. True or False: The concept of the Big Mac Index is based on the idea of purchasing power parity.
   - True

6. True or False: According to the text, over 3,000 consumer goods and services are included in the current PPP calculations.
   - True

7. True or False: The price of a Big Mac is derived solely from the cost of its ingredients.
   - False

8. True or False: The purpose of the Big Mac Index is to calculate the exact exchange rate between t