In [None]:
import sys
import os
import logging
import requests
from zipfile import ZipFile, BadZipFile
import subprocess

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Set up the cache for this notebook
repo_path = '.'
if repo_path not in sys.path:
    sys.path.append(repo_path)

os.environ["DSP_NOTEBOOK_CACHEDIR"] = os.path.join(repo_path, 'cache')

# Function to download files
def download_file(url, output):
    if not os.path.exists(output):
        try:
            logger.info(f"Downloading {url} to {output}")
            response = requests.get(url, stream=True)
            response.raise_for_status()
            with open(output, 'wb') as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)
            logger.info(f"Downloaded {output}")
        except requests.RequestException as e:
            logger.error(f"Error downloading {url}: {str(e)}")

# Function to unzip files
def unzip_file(zip_path, extract_to='.'):
    if not os.path.exists(extract_to):
        os.makedirs(extract_to)
    try:
        with ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(extract_to)
        logger.info(f"Extracted {zip_path} to {extract_to}")
    except BadZipFile as e:
        logger.error(f"Error extracting {zip_path}: File is not a zip file")
    except Exception as e:
        logger.error(f"Error extracting {zip_path}: {str(e)}")

# Download biomedical terms and embeddings
download_file('https://www.dropbox.com/scl/fi/5ywdea0xjkb10os1o6ryj/embeddings-FremyCompany-BioLORD-STAMB2-v1.pt?rlkey=nek172noiyrpn588jt7dunl66&dl=0', 'embeddings[FremyCompany--BioLORD-STAMB2-v1].pt')
download_file('https://www.dropbox.com/scl/fi/f92z0vg42icsn5g89f3wu/reaction_terms.txt?rlkey=ot8qasqr3r9getbn9epyji0aa&dl=0', 'reaction_terms.txt')
download_file('https://www.dropbox.com/scl/fi/cgu0eal9m7q0xrswp49g5/cache.zip?rlkey=x3lnpc5vz1t0di7igzthrky8h&dl=0', 'cache.zip')

# Unzip cache files
unzip_file('cache.zip', 'cache')


# Import libraries
import datasets
import dspy
from dspy.evaluate import Evaluate
import tqdm
from sentence_transformers import SentenceTransformer
import torch

# Additional setup or function definitions can go here

# Example function to initialize models and other resources
def initialize_resources():
    # Initialize sentence transformer
    model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
    return model

# Initialize resources
model = initialize_resources()

print("Setup complete. Ready to run your main code.")


In [7]:
lm = dspy.OllamaLocal(model='llama3')

In [8]:
# normalize one reaction string
def normalize(reaction: str) -> str:
    # Remove leading and trailing newlines
    reaction = reaction.strip('\n')

    # Remove leading and trailing punctuation and newlines
    reaction = re.sub(r'^[^\w\s]+|[^\w\s]+$', '', reaction, flags=re.UNICODE)

    # Remove leading and trailing newlines
    reaction = reaction.strip('\n')

    return reaction.strip().lower()

# given a csv string of reactions, parse into a list
def extract_reactions_from_string(reactions: str) -> list[str]:
  return [normalize(r) for r in reactions.split(',')]

# given a list of csv trings of reactions, parse into a list
def extract_reactions_from_strings(reactions: list[str]) -> list[str]:
  reactions = [normalize(r) for r in reactions]
  reactions = ", ".join(reactions)
  return extract_reactions_from_string(reactions)

# process a biodex datapoint
def preprocess_example(example: dict) -> dspy.Example:
    title = example['title']
    abstract = example['abstract']
    context = example['fulltext_processed'].split('\n\nTEXT:\n', 1)[-1]
    reactions = extract_reactions_from_string(example['reactions'])

    example = dict(title=title, abstract=abstract, context=context, reactions=reactions)
    example['labels'] = dspy.Example(reactions=reactions)

    return example

In [9]:
dataset = datasets.load_dataset("BioDEX/BioDEX-Reactions")
official_trainset, official_devset = dataset['train'], dataset['validation']
trainset, devset = [], []

for example in tqdm.tqdm(official_trainset):
    if len(trainset) >= 1000: break
    trainset.append(preprocess_example(example))

for example in tqdm.tqdm(official_devset):
    if len(devset) >= 500: break
    devset.append(preprocess_example(example))

trainsetX = [dspy.Example(**x).with_inputs('title', 'abstract', 'context', 'labels') for x in trainset]
trainset = [dspy.Example(**x).with_inputs('title', 'abstract', 'context') for x in trainset]
devsetX = [dspy.Example(**x).with_inputs('title', 'abstract', 'context', 'labels') for x in devset]
devset = [dspy.Example(**x).with_inputs('title', 'abstract', 'context') for x in devset]

print(len(trainset), len(devset))

  9%|█████████▌                                                                                                     | 1000/11543 [00:00<00:03, 3450.01it/s]
 17%|███████████████████▌                                                                                             | 500/2886 [00:00<00:00, 3312.23it/s]

1000 500





In [10]:
# show one title and abstract, as well as the reactions in the final expert-created drug safety report
print(trainset[0].title)
print()
print(trainset[0].abstract)
print()
print(trainset[0].labels().reactions)

HIV-1 Drug Resistance by Ultra-Deep Sequencing Following Short Course Zidovudine, Single-Dose Nevirapine, and Single-Dose Tenofovir with Emtricitabine for Prevention of Mother-to-Child Transmission.

Antiretroviral drug resistance following pMTCT strategies remains a significant problem. With rapid advancements in next generation sequencing technologies, there is more focus on HIV drug-resistant variants of low frequency, or the so-called minority variants. In South Africa, AZT monotherapy for pMTCT, similar to World Health Organization option A, has been used since 2008. In 2010, a single dose of co-formulated TDF/FTC was included in the strategy for prevention of resistance conferred by single-dose nevirapine (sd NVP). The study was conducted in KwaZulu-Natal, South Africa, among pMTCT participants who received AZT monotherapy from 14 weeks of gestation, intrapartum AZT and sd NVP, and postpartum sd TDF/FTC. Twenty-six specimens collected at 6 weeks post-delivery were successfully se

In [11]:
def metric_recall(gold: list[str], pred: list[str]) -> float:
  """ Given a gold and predicted list of reactions, normalize and compute recall."""
  gold = [normalize(r) for r in gold]
  pred = [normalize(r) for r in pred]

  gold, pred = set(gold), set(pred)

  intersection = gold.intersection(pred)

  recall = len(intersection) / len(gold)
  return recall

def metric_recallK(gold: list[str], pred: list[str], K:int=10) -> float:
  return metric_recall(gold, pred[:K])

# wrap the recall@K metric so it can take dspy Examples
def dspy_metric_recall10(gold: dspy.Example, pred: dspy.Example, trace=None) -> float:
  return metric_recallK(gold.reactions, pred.reactions, K=10)

def dspy_metric_recall20(gold: dspy.Example, pred: dspy.Example, trace=None) -> float:
  return metric_recallK(gold.reactions, pred.reactions, K=20)

def dspy_metric_recall30(gold: dspy.Example, pred: dspy.Example, trace=None) -> float:
  return metric_recallK(gold.reactions, pred.reactions, K=30)


In [12]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from collections import Counter
nltk.download('stopwords')


# Sample corpus of text 
corpus = """
Nausea, headache, and dizziness are common side effects of the medication.
Some patients reported experiencing fatigue and vomiting after taking the drug.
The rash appeared on the patient's skin after using the cream.
"""

# Tokenize the text into words
words = word_tokenize(corpus)

# Filter out stopwords (common words like 'and', 'the', etc.)
stop_words = set(stopwords.words('english'))
filtered_words = [word.lower() for word in words if word.isalpha() and word.lower() not in stop_words]

# Identify potential reaction terms (nouns and adjectives)
reaction_terms = [word for (word, pos) in nltk.pos_tag(filtered_words) if pos.startswith('NN') or pos.startswith('JJ')]

# Count the occurrence of each reaction term
reaction_term_counts = Counter(reaction_terms)

# Print the most common reaction terms and their frequencies
print("Most common reaction terms:")
for term, count in reaction_term_counts.most_common():
    print(f"{term}: {count}")


Most common reaction terms:
nausea: 1
headache: 1
dizziness: 1
common: 1
side: 1
effects: 1
medication: 1
patients: 1
fatigue: 1
drug: 1
rash: 1
patient: 1
skin: 1
cream: 1


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\gagan\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [13]:
class ReactionGrounder():
    """ Matches a given reaction string to one of the BioDEX reaction terms.
    Implements several ways of grounding reactions. Each grounding function returns a list of length K.
    Each list item consists of a similarity score and the associated grounded term. """

    def __init__(self, model_name='FremyCompany/BioLORD-STAMB2-v1', reaction_terms=None, trainset=None):
        self.model_name = model_name
        self.friendly_model_name = self.model_name.replace('/', '--')

        self.model = SentenceTransformer(self.model_name)
        self.model.to('cpu')

        if reaction_terms is None:
            raise ValueError("No reaction terms provided.")
        self.reaction_terms = reaction_terms

        self.trainset = trainset if trainset is not None else []
        self.reaction_terms_to_count = self._calculate_counts()

        self.reaction_embeddings = self._load_embeddings()

    def _load_embeddings(self) -> torch.Tensor:
        """Load or create embeddings for all reaction terms."""
        reaction_embeddings_filename = f'embeddings[{self.friendly_model_name}].pt'

        # If the file exists, load. Else, create embeddings.
        if os.path.isfile(reaction_embeddings_filename):
            with open(reaction_embeddings_filename, "rb") as f:
                reaction_embeddings = torch.load(f, map_location=torch.device('cpu'))
        else:
            self.model.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
            reaction_embeddings = self.model.encode(self.reaction_terms, convert_to_tensor=True, show_progress_bar=True)
            with open(reaction_embeddings_filename, "wb") as f:
                torch.save(reaction_embeddings, f)
            self.model.to(torch.device('cpu'))
        return reaction_embeddings

    def _calculate_counts(self) -> dict[str, int]:
        """Given a training set, count how many times each reaction occurred as prior."""
        reactions = []
        for example in self.trainset:
            reactions.extend(example['reactions'].split())
        counts = defaultdict(lambda: 0, Counter(reactions))
        return counts

    @lru_cache(maxsize=100000)
    def ground(self, reaction: str, K:int=3) -> list[tuple[float, str]]:
        """ Finds K closest matches based on semantic embedding similarity. """
        query_embeddings = self.model.encode(reaction, convert_to_tensor=True)
        query_result = sentence_transformers.util.semantic_search(query_embeddings, self.reaction_embeddings, query_chunk_size=64, top_k=K)[0]

        # get (score, term) tuples
        matches = []
        for result in query_result:
            score = result['score']
            term = self.reaction_terms[result['corpus_id']]
            matches.append((score, term))

        return sorted(matches, reverse=True)

    @lru_cache(maxsize=100000)
    def ground_with_prior(self, reaction, K=3):
        """ Finds 3*K closest matches based on semantic embedding similarity.
        Incorporates the prior counts, returns K most likely terms."""
        matches = self.ground(reaction, K=3*K)

        # heuristically incorporate prior into the similarity scores
        new_matches = []
        for score, term in matches:
            prior = self.reaction_terms_to_count[term]
            score = score * max(2, math.log(prior + math.e))
            new_matches.append((score, term))

        return sorted(new_matches, reverse=True)[:K]

# Define reaction terms
reaction_terms = [
    "nausea",
    "headache",
    "dizziness",
    "rash",
    "vomiting",
    "fatigue",
    "common",
    "side",
    "effects",
    "medication",
    "patients",
    "drug",
    "patient",
    "skin",
    "cream"
]

# Example trainset
trainset_example = [
    {"reactions": "nausea headache dizziness"},
    {"reactions": "rash vomiting fatigue"},
    {"reactions": "common side effects medication"},
    {"reactions": "patients drug patient skin cream"}
]

# Create the grounder instance
grounder = ReactionGrounder(reaction_terms=reaction_terms, trainset=trainset_example)


In [14]:
K = 5
queries = [
    'pain',
    'fever',
    'i have a runny nose'
]

for q in queries:
    result = grounder.ground(q, K=K)
    result_with_prior = grounder.ground_with_prior(q, K=K)

    print("Query: ", q)
    print("\t Ground without prior:")
    for score, term in result:
        print(f"\t\t{term} - {score}")
    print("\t Ground with prior:")
    for score, term in result_with_prior:
        print(f"\t\t{term} - {score}")
        

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Query:  pain
	 Ground without prior:
		headache - 0.7172471880912781
		fatigue - 0.3643420934677124
		nausea - 0.34820422530174255
		vomiting - 0.23028042912483215
		patients - 0.16388729214668274
	 Ground with prior:
		headache - 1.4344943761825562
		fatigue - 0.7286841869354248
		nausea - 0.6964084506034851
		vomiting - 0.4605608582496643
		patients - 0.3277745842933655


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Query:  fever
	 Ground without prior:
		dizziness - 0.2840457856655121
		rash - 0.2828308343887329
		vomiting - 0.22629790008068085
		nausea - 0.20054276287555695
		effects - 0.19953130185604095
	 Ground with prior:
		dizziness - 0.5680915713310242
		rash - 0.5656616687774658
		vomiting - 0.4525958001613617
		nausea - 0.4010855257511139
		effects - 0.3990626037120819


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Query:  i have a runny nose
	 Ground without prior:
		headache - 0.29946398735046387
		nausea - 0.18776363134384155
		vomiting - 0.18563412129878998
		common - 0.1825229674577713
		dizziness - 0.16667845845222473
	 Ground with prior:
		headache - 0.5989279747009277
		nausea - 0.3755272626876831
		vomiting - 0.37126824259757996
		common - 0.3650459349155426
		dizziness - 0.33335691690444946


In [15]:
from operator import add

def resolve_reactions(reactions: list[(float, str)], resolve_f = add) -> list[str]:
  """ Applies a resolve function across all duplicate predicted reactions to aggregate their similarity score.
  Sorts the resulting reactions according to aggregated score."""

  reactions_to_score = defaultdict(lambda: .0)

  for score, term in reactions:
    reactions_to_score[term] = resolve_f(reactions_to_score[term], score)

  reactions = sorted(reactions_to_score.items(), key=lambda x: x[1], reverse=True)
  reactions = [(r[1], r[0]) for r in reactions]
  return reactions

In [16]:
class Chunker:
    def __init__(self, context_window=3000, max_windows=5):
        self.context_window = context_window
        self.max_windows = max_windows
        self.window_overlap = 0.02

    def __call__(self, paper):
        snippet_idx = 0

        while snippet_idx < self.max_windows and paper:
            endpos = int(self.context_window * (1.0 + self.window_overlap))
            snippet, paper = paper[:endpos], paper[endpos:]

            next_newline_pos = snippet.rfind('\n')
            if paper and next_newline_pos != -1 and next_newline_pos >= self.context_window // 2:
                paper = snippet[next_newline_pos+1:] + paper
                snippet = snippet[:next_newline_pos]

            yield snippet_idx, snippet.strip()
            snippet_idx += 1

In [17]:
# 1

class PredictReactions(dspy.Signature):
    
    __doc__ = f"""Given a snippet from a medical article, identify the adverse drug reactions affecting the patient. If none are mentioned in the snippet, say '\n'."""

    title = dspy.InputField()
    context = dspy.InputField()
    reactions = dspy.OutputField(desc="list of comma-separated adverse drug reactions", format=lambda x: ', '.join(x) if isinstance(x, list) else x)


In [18]:
pip install dspy


Note: you may need to restart the kernel to use updated packages.


In [19]:
pip install graph_of_thoughts


Note: you may need to restart the kernel to use updated packages.


In [20]:
# 2

# Define a custom class to simulate the behavior of Graph of Thoughts with hint
class GraphOfThoughtWithHint:
    def __init__(self, model, n):
        self.model = model
        self.n = n

    def predict(self, title, context, hint):
        # Implement your prediction logic here
        # For example, you can use the provided model to make predictions
        # based on the title, context, and hint
        # Here, I'll just return a mock response to illustrate
        print(f"Predicting with title: {title}, context: {context}, hint: {hint}")
        # Mock prediction result (you should replace this with actual model prediction)
        return MockPrediction()  # Replace this with actual prediction logic

# Mock class to simulate prediction response
class MockPrediction:
    def __init__(self):
        self.completions = MockCompletions()

class MockCompletions:
    def __init__(self):
        self.reactions = ["mock reaction 1", "mock reaction 2"]

# Modify your PredictThenGround class to use GraphOfThoughtWithHint
class PredictThenGround(dspy.Module):
    def __init__(self, context_window=3000, max_windows=5, num_preds=1, grounding_function=lambda r: [(1.0, r)], resolve_function=add):
        super().__init__()

        # Divide a biomedical paper into chunks
        self.chunk = Chunker(context_window=context_window, max_windows=max_windows)
        # Initialize the custom Graph of Thought with hint
        self.predict = GraphOfThoughtWithHint(PredictReactions, n=num_preds)
        # Set the grounding function and resolve function
        self.grounding_function = grounding_function
        self.resolve_function = resolve_function

    def forward(self, title, abstract, context, labels=None):
        hint = f"{HINT} {', '.join(labels.reactions)}." if labels else None
        reactions = []

        # Iterate over each chunk in the paper
        for _, snippet in self.chunk(abstract + '\n\n' + context):
            # Use the custom Graph of Thought with hint to predict ungrounded reactions
            chunk_reactions = self.predict.predict(title=title, context=[snippet], hint=hint)
            # Check if chunk_reactions is None or if completions is None
            if chunk_reactions and chunk_reactions.completions:
                chunk_reactions_list = extract_reactions_from_strings(chunk_reactions.completions.reactions)
                reactions.extend(chunk_reactions_list)
            else:
                # Handle the case where prediction fails or returns None
                # Log a warning message and continue with the next chunk
                print("Warning: Prediction failed or returned None for the chunk:", snippet)

        # Get grounded reactions and grounding confidence for each ungrounded reaction
        grounded_reactions = sorted([r for sublist in [self.grounding_function(r) for r in reactions] for r in sublist], reverse=True)
        # Aggregate duplicate predictions and sort based on confidence
        resolved_reactions = resolve_reactions(grounded_reactions, resolve_f=self.resolve_function)
        # Get the final list of grounded reactions
        reactions = [r[1] for r in resolved_reactions]

        # Track all predictions
        return dspy.Prediction(reactions=reactions, resolved_reactions=resolved_reactions, grounded_reactions=grounded_reactions, ungrounded_reactions=reactions)


In [21]:
# 3

# set DSPy to use Ollama local model
lm = dspy.OllamaLocal(model='llama3')
dspy.settings.configure(lm=lm)

In [22]:
# 4

# create an evaluation helper function
evaluateR10 = Evaluate(devset=trainset[100:150], metric=dspy_metric_recall10, num_threads=8, display_progress=True, display_table=0, max_errors=100)
evaluateR20 = Evaluate(devset=trainset[100:150], metric=dspy_metric_recall20, num_threads=8, display_progress=True, display_table=0, max_errors=100)
evaluateR30 = Evaluate(devset=trainset[100:150], metric=dspy_metric_recall30, num_threads=8, display_progress=True, display_table=0, max_errors=100)

In [23]:
# 5

# create the pipeline
pipeline_no_grounding_no_chunking = PredictThenGround(max_windows=1)

# get an example
example = trainset[0]
example_x = example.inputs()
example_y = example.labels()

# get a prediction
prediction = pipeline_no_grounding_no_chunking(**example_x)

# print the reactions predicted
print('Predicted reactions: ', prediction.reactions)
print('Gold reactions: ', example_y.reactions)

Predicting with title: HIV-1 Drug Resistance by Ultra-Deep Sequencing Following Short Course Zidovudine, Single-Dose Nevirapine, and Single-Dose Tenofovir with Emtricitabine for Prevention of Mother-to-Child Transmission., context: ['Antiretroviral drug resistance following pMTCT strategies remains a significant problem. With rapid advancements in next generation sequencing technologies, there is more focus on HIV drug-resistant variants of low frequency, or the so-called minority variants. In South Africa, AZT monotherapy for pMTCT, similar to World Health Organization option A, has been used since 2008. In 2010, a single dose of co-formulated TDF/FTC was included in the strategy for prevention of resistance conferred by single-dose nevirapine (sd NVP). The study was conducted in KwaZulu-Natal, South Africa, among pMTCT participants who received AZT monotherapy from 14 weeks of gestation, intrapartum AZT and sd NVP, and postpartum sd TDF/FTC. Twenty-six specimens collected at 6 weeks 

In [24]:
# 6

evaluateR10(pipeline_no_grounding_no_chunking)

Predicting with title: Superior Orbital Fissure Syndrome and Ophthalmoplegia Caused by Varicella Zoster Virus with No Skin Eruption in a Patient Treated with Tumor Necrosis Alpha Inhibitor., context: ['Varicella zoster virus lies dormant in the dorsal root ganglia after symptomatic chicken pox infection, usually in childhood. If the virus reactivates in the trigeminal ganglia, it can cause varicella zoster ophthalmicus, which can have severe ocular complications. We report a case of a 73-year-old woman in severe immunosuppression due to treatment with mycophenolate mofetil, glucocorticosteroids and a tumor necrosis factor alpha inhibitor. The reactivation caused superior orbital fissure syndrome, which has only rarely been described in relation to varicella zoster virus reactivation. In our case, the syndrome was seen along with severe encephalitis.\n\nBackground\nThe incidence of varicella zoster virus (VZV) reactivation is increased in immunosuppressed patients, and reactivation of h

Average Metric: 0.0 / 50  (0.0): 100%|███████████████████████████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 1666.07it/s]

Average Metric: 0.0 / 50  (0.0%)





0.0

In [25]:
# 7


# create the pipeline
pipeline_no_grounding = PredictThenGround(max_windows=5)

# get an example
example = trainset[0]
example_x = example.inputs()
example_y = example.labels()

# get a prediction
prediction = pipeline_no_grounding(**example_x)

# print the reactions predicted
print('Predicted reactions: ', prediction.reactions)
print('Gold reactions: ', example_y.reactions)

evaluateR10(pipeline_no_grounding)

Predicting with title: HIV-1 Drug Resistance by Ultra-Deep Sequencing Following Short Course Zidovudine, Single-Dose Nevirapine, and Single-Dose Tenofovir with Emtricitabine for Prevention of Mother-to-Child Transmission., context: ['Antiretroviral drug resistance following pMTCT strategies remains a significant problem. With rapid advancements in next generation sequencing technologies, there is more focus on HIV drug-resistant variants of low frequency, or the so-called minority variants. In South Africa, AZT monotherapy for pMTCT, similar to World Health Organization option A, has been used since 2008. In 2010, a single dose of co-formulated TDF/FTC was included in the strategy for prevention of resistance conferred by single-dose nevirapine (sd NVP). The study was conducted in KwaZulu-Natal, South Africa, among pMTCT participants who received AZT monotherapy from 14 weeks of gestation, intrapartum AZT and sd NVP, and postpartum sd TDF/FTC. Twenty-six specimens collected at 6 weeks 

Average Metric: 0.0 / 50  (0.0): 100%|███████████████████████████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 1281.90it/s]

Average Metric: 0.0 / 50  (0.0%)





0.0

In [26]:
# 8


K = 3

# greate the grounding function
grounding_function = partial(grounder.ground, K=K)

# greate the pipeline
pipeline_with_ground = PredictThenGround(max_windows=5, grounding_function=grounding_function)

# get an example
example = trainset[0]
example_x = example.inputs()
example_y = example.labels()

# get a predcition
prediction = pipeline_with_ground(**example_x)

# print the reactions predicted
print('Predicted reactions: ', prediction.reactions)
print('Gold reactions: ', example_y.reactions)
print('Recall@10: ', dspy_metric_recall10(example_y, prediction))
evaluateR10(pipeline_with_ground)

Predicting with title: HIV-1 Drug Resistance by Ultra-Deep Sequencing Following Short Course Zidovudine, Single-Dose Nevirapine, and Single-Dose Tenofovir with Emtricitabine for Prevention of Mother-to-Child Transmission., context: ['Antiretroviral drug resistance following pMTCT strategies remains a significant problem. With rapid advancements in next generation sequencing technologies, there is more focus on HIV drug-resistant variants of low frequency, or the so-called minority variants. In South Africa, AZT monotherapy for pMTCT, similar to World Health Organization option A, has been used since 2008. In 2010, a single dose of co-formulated TDF/FTC was included in the strategy for prevention of resistance conferred by single-dose nevirapine (sd NVP). The study was conducted in KwaZulu-Natal, South Africa, among pMTCT participants who received AZT monotherapy from 14 weeks of gestation, intrapartum AZT and sd NVP, and postpartum sd TDF/FTC. Twenty-six specimens collected at 6 weeks 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicted reactions:  ['common', 'dizziness', 'rash']
Gold reactions:  ['drug resistance', 'exposure during pregnancy', 'viral mutation identified']
Recall@10:  0.0
Predicting with title: Superior Orbital Fissure Syndrome and Ophthalmoplegia Caused by Varicella Zoster Virus with No Skin Eruption in a Patient Treated with Tumor Necrosis Alpha Inhibitor., context: ['Varicella zoster virus lies dormant in the dorsal root ganglia after symptomatic chicken pox infection, usually in childhood. If the virus reactivates in the trigeminal ganglia, it can cause varicella zoster ophthalmicus, which can have severe ocular complications. We report a case of a 73-year-old woman in severe immunosuppression due to treatment with mycophenolate mofetil, glucocorticosteroids and a tumor necrosis factor alpha inhibitor. The reactivation caused superior orbital fissure syndrome, which has only rarely been described in relation to varicella zoster virus reactivation. In our case, the syndrome was seen along

Average Metric: 0.17763157894736842 / 50  (0.4): 100%|███████████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 1724.12it/s]

Average Metric: 0.17763157894736842 / 50  (0.4%)





0.36

In [27]:
# Function to get explanations for predicted reactions using Gemini API
def get_explanations(example, prediction, explainer):
    title = example.inputs().title
    context = example.inputs().context
    predicted_reactions = prediction.reactions

    explanations = explainer.explain_prediction(title, context, predicted_reactions)
    return explanations

K = 3

grounding_function = partial(grounder.ground_with_prior, K=K)
pipeline_with_ground_and_prior = PredictThenGround(max_windows=5, grounding_function=grounding_function)

# Get an example from the trainset
example = trainset[0]
example_x = example.inputs()
example_y = example.labels()

# Get a prediction
prediction = pipeline_with_ground_and_prior(**example_x)

# Print the reactions predicted
print('Predicted reactions: ', prediction.reactions)
print('Gold reactions: ', example_y.reactions)

# Evaluate the pipeline with Recall@10 metric
evaluateR10(pipeline_with_ground_and_prior)


Predicting with title: HIV-1 Drug Resistance by Ultra-Deep Sequencing Following Short Course Zidovudine, Single-Dose Nevirapine, and Single-Dose Tenofovir with Emtricitabine for Prevention of Mother-to-Child Transmission., context: ['Antiretroviral drug resistance following pMTCT strategies remains a significant problem. With rapid advancements in next generation sequencing technologies, there is more focus on HIV drug-resistant variants of low frequency, or the so-called minority variants. In South Africa, AZT monotherapy for pMTCT, similar to World Health Organization option A, has been used since 2008. In 2010, a single dose of co-formulated TDF/FTC was included in the strategy for prevention of resistance conferred by single-dose nevirapine (sd NVP). The study was conducted in KwaZulu-Natal, South Africa, among pMTCT participants who received AZT monotherapy from 14 weeks of gestation, intrapartum AZT and sd NVP, and postpartum sd TDF/FTC. Twenty-six specimens collected at 6 weeks 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicted reactions:  ['common', 'dizziness', 'rash']
Gold reactions:  ['drug resistance', 'exposure during pregnancy', 'viral mutation identified']
Predicting with title: Superior Orbital Fissure Syndrome and Ophthalmoplegia Caused by Varicella Zoster Virus with No Skin Eruption in a Patient Treated with Tumor Necrosis Alpha Inhibitor., context: ['Varicella zoster virus lies dormant in the dorsal root ganglia after symptomatic chicken pox infection, usually in childhood. If the virus reactivates in the trigeminal ganglia, it can cause varicella zoster ophthalmicus, which can have severe ocular complications. We report a case of a 73-year-old woman in severe immunosuppression due to treatment with mycophenolate mofetil, glucocorticosteroids and a tumor necrosis factor alpha inhibitor. The reactivation caused superior orbital fissure syndrome, which has only rarely been described in relation to varicella zoster virus reactivation. In our case, the syndrome was seen along with severe enc

Average Metric: 0.17763157894736842 / 50  (0.4): 100%|███████████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 1666.67it/s]

Average Metric: 0.17763157894736842 / 50  (0.4%)





0.36

In [28]:
# 10

def print_score_and_term(ls):
    for score, term in ls:
        print('\t\t', term, '\t', score)

for example in trainset[100:150]:
    # get an example
    example_x = example.inputs()
    example_y = example.labels()

    # get a predcition
    prediction = pipeline_with_ground_and_prior(**example_x)

    # print the reactions predicted
    print('Gold reactions: ',)
    print('\t', example_y.reactions)
    print('Predicted reactions: ')
    print('\t', prediction.reactions)
    print('Ungrounded reactions: ')
    print('\t', prediction.ungrounded_reactions)

    print('Resolved reactions: ')
    print_score_and_term(prediction.resolved_reactions)
    print('Grounded reactions: ')
    print_score_and_term(prediction.grounded_reactions)
    print('--------')

Predicting with title: Superior Orbital Fissure Syndrome and Ophthalmoplegia Caused by Varicella Zoster Virus with No Skin Eruption in a Patient Treated with Tumor Necrosis Alpha Inhibitor., context: ['Varicella zoster virus lies dormant in the dorsal root ganglia after symptomatic chicken pox infection, usually in childhood. If the virus reactivates in the trigeminal ganglia, it can cause varicella zoster ophthalmicus, which can have severe ocular complications. We report a case of a 73-year-old woman in severe immunosuppression due to treatment with mycophenolate mofetil, glucocorticosteroids and a tumor necrosis factor alpha inhibitor. The reactivation caused superior orbital fissure syndrome, which has only rarely been described in relation to varicella zoster virus reactivation. In our case, the syndrome was seen along with severe encephalitis.\n\nBackground\nThe incidence of varicella zoster virus (VZV) reactivation is increased in immunosuppressed patients, and reactivation of h