In [1]:
import sys
import os 
import nest_asyncio

# Sanity check
print(sys.executable)
nest_asyncio.apply()

from dotenv import load_dotenv
load_dotenv() 

/home/amor/Documents/code_dw/human_in_the_loop_workflow_demo/venv/bin/python3.11


In [88]:
import os
from pydantic import BaseModel, Field
from llama_index.core.workflow import (
    Workflow,
    step,
    Event,
    Context,
    StartEvent,
    StopEvent
)
from llama_index.llms.openai import OpenAI
from llama_index.core import SimpleDirectoryReader
from llama_index.core import (
    SimpleDirectoryReader,
    load_index_from_storage,
    VectorStoreIndex,
    StorageContext,
)
from llama_index.core.node_parser import SentenceSplitter
from llama_index.retrievers.bm25 import BM25Retriever
import Stemmer
from llama_index.core import VectorStoreIndex, get_response_synthesizer
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import LLMRerank
from llama_index.core.evaluation import FaithfulnessEvaluator

## Exercise : 

Your goal is to streamline the content creation for a Blog post

You will use the following textbook pdf `./data/textbook/AnatomyandPhysiology-OP.pdf`

Every paragraph produced should be deeply sourced from some of the notebook content



In [None]:

documents = SimpleDirectoryReader(
    input_files=["./data/textbook/AnatomyandPhysiology-OP.pdf"],
).load_data()
splitter = SentenceSplitter(chunk_size=256)
nodes = splitter.get_nodes_from_documents(documents)


In [3]:
len(nodes)

In [4]:
retriever_top_5 = BM25Retriever.from_defaults(
    nodes=nodes,
    similarity_top_k=5,
    stemmer=Stemmer.Stemmer("english"),
    language="english",
)

15888

### Test 1 - Basic approach

In [5]:
# configure response synthesizer
response_synthesizer = get_response_synthesizer(
    response_mode="tree_summarize",
)

# assemble query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever_top_5,
    response_synthesizer=response_synthesizer,
)
response = query_engine.query("Tell me about how the skin works")



AttributeError: 'Response' object has no attribute 'text'

In [7]:
response.response

"The skin, as part of the integumentary system, is responsible for more than just outward appearance. It is the body's largest organ system, covering about 16 percent of body weight and protecting inner organs. The skin requires daily care and protection to maintain its health. It consists of layers that work together to perform essential functions, such as protecting against external threats, regulating body temperature, and serving as a barrier to prevent dehydration and infection. Additionally, specialized cells like keratinocytes and melanocytes play crucial roles in skin structure and function, contributing to processes like skin pigmentation and regeneration."

In [16]:
response.source_nodes[5].text

IndexError: list index out of range

**Conclusion** Does not work :( 

### Test 2 - Use a reranker and a larger set of documents

In [20]:
top_k = 15


retriever_top_k = BM25Retriever.from_defaults(
    nodes=nodes,
    similarity_top_k=top_k,
    stemmer=Stemmer.Stemmer("english"),
    language="english",
)

reranker = LLMRerank(
            choice_batch_size=5,
            top_n=top_k,
        )

In [21]:
# assemble query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever_top_k,
    node_postprocessors=[reranker]
)

In [22]:
response = query_engine.query("Tell me about how the skin works")


In [23]:
response.response

"The skin, along with its accessory structures, is a vital part of the integumentary system. It serves as the body's largest organ system, covering about 16 percent of body weight and protecting the inner organs. The skin plays a crucial role in maintaining overall health by providing a protective barrier against external elements and regulating body temperature through processes like radiation, convection, and evaporation. Additionally, the skin is made up of tissues that work together to perform essential functions, such as protecting against diseases, disorders, and injuries that can affect the integumentary system."

### Test 3 - Custom workflow

Use a workflow for custom processing


In [110]:
from typing import List, Any
from pydantic import BaseModel, Field


class Paragraph(BaseModel):
    subtitle: str = Field("The subtitle of this paragraph. It will be the main topic of this text.")
    text: Any


class Article(BaseModel):
    """An article is composed of several paragraph """

    title: str
    paragraphs: List[Paragraph]


class Outline(BaseModel):
    """An outline represent the structure of an article"""

    title: str = Field("The title of this article.")
    subtitles: List[str]  = Field("The sub-titles of each part of the article.")


class OutlineEvent(Event):
    outline: Outline


class VerificationEvent(Event):
    paragraphs: list[Paragraph]



class ContentCreationWorkflow(Workflow):

    @step
    async def step_a(self, ev: StartEvent) -> OutlineEvent:
        llm = OpenAI(temperature=0.05, model="gpt-4o-mini")
        sllm = llm.as_structured_llm(output_cls=Outline)
        outline = sllm.complete(f"Write the outline of an article about {ev['message']}. There should be at least 7 parts.").raw
        return OutlineEvent(outline=outline)


    @step
    async def step_b(self, ev: OutlineEvent) -> VerificationEvent:
        outline = ev.outline
        query_engine = RetrieverQueryEngine(
            retriever=retriever_top_k,
            node_postprocessors=[reranker]
        )
        paragraphs = list()
        for subtitle in outline.subtitles:
            rez = query_engine.query(subtitle)
            paragraphs.append(Paragraph(subtitle=subtitle, text=rez))

        return VerificationEvent(paragraphs=paragraphs)


    @step
    async def step_c(self, ev: VerificationEvent) -> StopEvent:
        paragraphs = ev.paragraphs
        evaluator_gpt4 = FaithfulnessEvaluator(llm=llm)
        final_paragraphs = list()
        for p in paragraphs:
            eval_result = evaluator_gpt4.evaluate_response(response=p.text)

            if eval_result.feedback == "YES" and eval_result.score >= 0.5:
                final_paragraphs.append(p)
            else:
                print(eval_result.feedback)

        if len(final_paragraphs) < 3:
            print(len(final_paragraphs))
            raise Exception("NOt enough paragraphs")
        
        content = "\n ".join(p.text.response for p in final_paragraphs)
        rez = llm.complete(
            f"""
            Given these source, please synthetize a complete article.
            Don't delete any source, you should just re-aarange it to make it more readable.

            {content}
            """
        )
        print(rez)
        print(rez.text)
        return StopEvent(result=rez.text)


In [111]:
w = ContentCreationWorkflow(timeout=120, verbose=True)

r = await w.run(message="skin")
r

Running step step_a
Step step_a produced event OutlineEvent
Running step step_b
Step step_b produced event VerificationEvent
Running step step_c
NO
NO
**The Importance of Skin Health: Understanding the Integumentary System**

The skin is a crucial part of the body's integumentary system, serving as the largest organ system and playing a significant role in maintaining homeostasis. Composed of multiple layers of cells and tissues, the skin is held to underlying structures by connective tissue. The deeper layer of skin is well vascularized, containing numerous blood vessels that support its functions. Alongside the skin, accessory structures such as hair, nails, and glands contribute to its overall protective capabilities.

The skin performs various essential functions, including protecting the body from external elements like microorganisms, chemicals, and UV sunlight. It prevents dehydration, acts as a sensory organ, regulates body temperature and electrolyte balance, and synthesizes v

"**The Importance of Skin Health: Understanding the Integumentary System**\n\nThe skin is a crucial part of the body's integumentary system, serving as the largest organ system and playing a significant role in maintaining homeostasis. Composed of multiple layers of cells and tissues, the skin is held to underlying structures by connective tissue. The deeper layer of skin is well vascularized, containing numerous blood vessels that support its functions. Alongside the skin, accessory structures such as hair, nails, and glands contribute to its overall protective capabilities.\n\nThe skin performs various essential functions, including protecting the body from external elements like microorganisms, chemicals, and UV sunlight. It prevents dehydration, acts as a sensory organ, regulates body temperature and electrolyte balance, and synthesizes vitamin D. Additionally, the skin stores fats, provides insulation, and forms a protective barrier against water loss.\n\nAs individuals age, chang

In [112]:
print(r)

**The Importance of Skin Health: Understanding the Integumentary System**

The skin is a crucial part of the body's integumentary system, serving as the largest organ system and playing a significant role in maintaining homeostasis. Composed of multiple layers of cells and tissues, the skin is held to underlying structures by connective tissue. The deeper layer of skin is well vascularized, containing numerous blood vessels that support its functions. Alongside the skin, accessory structures such as hair, nails, and glands contribute to its overall protective capabilities.

The skin performs various essential functions, including protecting the body from external elements like microorganisms, chemicals, and UV sunlight. It prevents dehydration, acts as a sensory organ, regulates body temperature and electrolyte balance, and synthesizes vitamin D. Additionally, the skin stores fats, provides insulation, and forms a protective barrier against water loss.

As individuals age, changes occu