In [13]:
import PyPDF2
import re
import os

from langchain import AzureOpenAI
from langchain.evaluation import load_evaluator, EvaluatorType
from langchain.chains import LLMChain

from langgraph.graph import Graph, Node, Edge
from langgraph.core import ExecutionContext

ModuleNotFoundError: No module named 'langchain'

#### Initialize GPT instances ####

In [14]:
azure_openai_key = os.getenv("AZURE_OPENAI_KEY")
azure_openai_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
azure_openai_api_version = os.getenv("AZURE_OPENAI_API_VERSION")

# Create instances for GPT-3.5 and GPT-4
gpt_3_5_instance = AzureOpenAI(
    api_key=azure_openai_key,
    api_base=azure_openai_endpoint,
    api_version=azure_openai_api_version,
    deployment_id="gpt-3.5-turbo"
)

gpt_4_instance = AzureOpenAI(
    api_key=azure_openai_key,
    api_base=azure_openai_endpoint,
    api_version=azure_openai_api_version,
    deployment_id="gpt-4"
)

NameError: name 'AzureOpenAI' is not defined

####    Create PDF Extractor   ####

In [9]:
class PDFExtractor:
    def __init__(self, pdf_path):
        self.pdf_path = pdf_path

    def extract_text(self):
        with open(self.pdf_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            text = ""
            for page in reader.pages:               
                text += page.extract_text()
        return text

    def preprocess_text(self, text):
        text = re.sub(r'\s+', ' ', text)  # Remove extra spaces and newlines
        return text

In [10]:
pdf_path = "data/defining nursing_2003.pdf"
extractor = PDFExtractor(pdf_path)
raw_text = extractor.extract_text()
preprocessed_text = extractor.preprocess_text(raw_text)

#### Create Flashcard Generator ####

In [11]:
class FlashcardGenerator:
    def __init__(self, llm_chain):
        self.llm_chain = llm_chain

    def generate_flashcards(self, text, feedback=None):
        prompt = f"""You are a highly intelligent educational assistant. Generate detailed and comprehensive flashcards from the following text. Ensure that each flashcard contains a well-structured question and a thorough answer. 
                     Use the following text to create flashcards:
                     {text}"""
        if feedback:
            prompt += f" Incorporate the following feedback into the flashcards: {feedback}"
        response = self.llm_chain.run(prompt)
        flashcards = response.strip().split("\n\n")
        return [{"question": fc.split(":")[0].strip(), "answer": fc.split(":")[1].strip()} for fc in flashcards if ":" in fc]


In [None]:
# Initialize the FlashcardGenerator with the GPT-3.5 instance
flashcard_generator = FlashcardGenerator(gpt_3_5_instance)
initial_flashcards = flashcard_generator.generate_flashcards(preprocessed_text)

#### langchain critic to evaluate/criticise generator ####

In [None]:
class LangChainCritic:
    def __init__(self, llm_chain):
        self.llm_chain = llm_chain
        self.evaluator = load_evaluator(EvaluatorType.QA)

    def evaluate(self, question, answer):
        evaluation = self.evaluator.evaluate(question, answer)
        score = evaluation['score']
        feedback = self.generate_feedback(evaluation)
        return score, feedback

    def generate_feedback(self, evaluation):
        feedback = []
        if evaluation['missing']:
            feedback.append(f"Missing: {evaluation['missing']}")
        if evaluation['superfluous']:
            feedback.append(f"Superfluous: {evaluation['superfluous']}")
        if evaluation['inaccuracy']:
            feedback.append(f"Inaccuracy: {evaluation['inaccuracy']}")
        if evaluation['clarity']:
            feedback.append(f"Clarity: {evaluation['clarity']}")
        if evaluation['completeness']:
            feedback.append(f"Completeness: {evaluation['completeness']}")
        return " ".join(feedback)



In [None]:
# Initialize the LangChainCritic with the GPT-4 instance
langchain_critic = LangChainCritic(gpt_4_instance)


#### Flashcard refiner ####

In [None]:
class FlashcardRefiner:
    def __init__(self, generator, critic, iterations=10):
        self.generator = generator
        self.critic = critic
        self.iterations = iterations

    def refine_flashcards(self, flashcards):
        for i in range(self.iterations):
            refined_flashcards = []
            for flashcard in flashcards:
                question, answer = flashcard['question'], flashcard['answer']
                score, feedback = self.critic.evaluate(question, answer)
                improved_flashcard = self.generator.generate_flashcards(question, feedback=feedback)[0]
                refined_flashcards.append(improved_flashcard)
            flashcards = refined_flashcards
            print(f"Iteration {i+1} completed")
        return flashcards




In [None]:
refiner = FlashcardRefiner(flashcard_generator, langchain_critic)
refined_flashcards = refiner.refine_flashcards(initial_flashcards)

#### Human Validator ####

In [None]:
class HumanValidator:
    def __init__(self):
        pass

    def validate_flashcards(self, flashcards):
        for flashcard in flashcards:
            print(f"Question: {flashcard['question']}")
            print(f"Answer: {flashcard['answer']}")
            feedback = input("Is this flashcard satisfactory? (yes/no): ")
            flashcard['status'] = 'Approved' if feedback.lower() == 'yes' else 'Needs Improvement'
        return [fc for fc in flashcards if fc['status'] == 'Approved']




In [None]:
#### create own flashcards and validate ####

In [None]:
validator = HumanValidator()
approved_flashcards = validator.validate_flashcards(refined_flashcards)

#### workflow mgmt with lang graph #### 

In [None]:


# Define nodes
extract_text_node = Node(
    id="extract_text",
    run=lambda context: extractor.extract_text(),
    outputs=["raw_text"]
)

preprocess_text_node = Node(
    id="preprocess_text",
    run=lambda context: extractor.preprocess_text(context["raw_text"]),
    inputs=["raw_text"],
    outputs=["preprocessed_text"]
)

generate_flashcards_node = Node(
    id="generate_flashcards",
    run=lambda context: flashcard_generator.generate_flashcards(context["preprocessed_text"]),
    inputs=["preprocessed_text"],
    outputs=["initial_flashcards"]
)

evaluate_flashcards_node = Node(
    id="evaluate_flashcards",
    run=lambda context: [langchain_critic.evaluate(fc['question'], fc['answer']) for fc in context["initial_flashcards"]],
    inputs=["initial_flashcards"],
    outputs=["evaluations"]
)

refine_flashcards_node = Node(
    id="refine_flashcards",
    run=lambda context: refiner.refine_flashcards(context["initial_flashcards"]),
    inputs=["initial_flashcards", "evaluations"],
    outputs=["refined_flashcards"]
)

validate_flashcards_node = Node(
    id="validate_flashcards",
    run=lambda context: validator.validate_flashcards(context["refined_flashcards"]),
    inputs=["refined_flashcards"],
    outputs=["approved_flashcards"]
)

# Define edges to connect nodes
edges = [
    Edge(from_node="extract_text", to_node="preprocess_text"),
    Edge(from_node="preprocess_text", to_node="generate_flashcards"),
    Edge(from_node="generate_flashcards", to_node="evaluate_flashcards"),
    Edge(from_node="evaluate_flashcards", to_node="refine_flashcards"),
    Edge(from_node="refine_flashcards", to_node="validate_flashcards")
]




In [None]:
# Create and compile the graph
graph = Graph(nodes=[extract_text_node, preprocess_text_node, generate_flashcards_node, evaluate_flashcards_node, refine_flashcards_node, validate_flashcards_node], edges=edges)

# Execution context
context = ExecutionContext(inputs={"pdf_path": pdf_path})
result = graph.run(context)

approved_flashcards = result["approved_flashcards"]