In [1]:
import requests

# Hugging Face Token 
headers = {"Authorization": f"Bearer {'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'}"}

Load Model

In [27]:
model = "Google/flan-t5-base"
#model = "HuggingFaceH4/zephyr-7b-beta"
API_URL = "https://api-inference.huggingface.co/models/" + model

Load Data

In [3]:
# Read the data on .dat format 
file_path = '../Datasets/Medical_Documents/test.dat'
with open(file_path, 'r') as file:
    lines = file.readlines()

## Test

## SUMMARIZATION

In this task we ask to summarize some test

In [25]:
# Choose the samples to summarize 
texts_to_summarize = [lines[5099], lines[2197], lines[0]]
texts_to_summarize

['Diabetes Intervention Study. Multi-intervention trial in newly diagnosed NIDDM. OBJECTIVE: In a randomized 5-yr multi-intervention trial, we tested the efficacy of intensified health education (IHE) in improving metabolic control and reducing the level of coronary risk factors and incidence of ischemic heart disease (IHD). RESEARCH DESIGN AND METHODS: Within the intervention group, the benefit of clofibric acid was evaluated in a double-blind study. One thousand one hundred thirty-nine newly diagnosed middle-aged (30- to 55-yr-old) patients with non-insulin-dependent diabetes mellitus (NIDDM) entered the study. They were classified as diet controlled after a 6-wk screening phase with conventional dietary treatment. During the follow-up, the control group (n = 378) was cared for at different diabetes outpatient clinics with a standardized surveillance. The intervention group (n = 761) had a structured IHE that included dietary advice, antismoking and antialcohol education, and ways to

In [28]:
# Creating a query that will generate the requests 
def query(texts):
    inputs = texts

    # Payload with multiple inputs
    payload = {
        "inputs": "summarize:" + inputs,
        "parameters": {
            #"min_length": 50, 
            "do_sample": False
           }
    }
    
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

for text in texts_to_summarize:
    output = query(text)
    print("Answer:", output)

Answer: [{'generated_text': 'IHE improves metabolic control and reducing triglyceride levels.'}]
Answer: [{'generated_text': 'Temporal patterns of cardiac index, DO2, and VO2 on outcome and effects of fluid'}]
Answer: [{'generated_text': 'Excision of unilateral epibulbar limbal dermoids.'}]


## TEXT GENERATION

In this task we ask some general and medical questions

In [19]:
# Example usage with multiple questions
questions = [
    "What are ICD-10 codes used for?",
    "Who is Elton John?",
    "Whatâ€™s clinical NLP?",
    "what are the Symptoms of Malaria?",
    "What is the ICD10 code of Malaria?"
]

In [21]:
# Creating a query that will generate the requests 
def query(input):
    # Payload with multiple inputs
    payload = {
        "inputs": input,
        "parameters": {
            #"min_length": 50
            }
    }

    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

# Loop across the questions
for input in questions:
    output = query(input)
    print("Answer:", output)


Answer: [{'generated_text': 'identifying medical records'}]
Answer: [{'generated_text': 'elton john'}]
Answer: [{'generated_text': 'clinical NLP'}]
Answer: [{'generated_text': 'a rash'}]
Answer: [{'generated_text': '0'}]


## QUESTION ANSWERING - FOR TEXT GENERATION MODELS

This test gives context and ask a question about it.  
The models in this case are Text Generation or Text2Text

In [29]:
# Example usage with multiple questions and contexts
questions = [
    "What's my name?",
    "How many patient files?",
    "Give me the insights?",
    "What is the problem?"
    ]


contexts = [
    "My name is Clara and I live in Berkeley.",
    lines[0], lines[0], lines[0]
    # Add corresponding contexts for each question
]

In [32]:
def query(questions, contexts):
    # Preparing a list of inputs by concatenating each question with its context
    inputs = f"""Answer the question based on the context below. If the
    question cannot be answered using the information provided answer
    with 'I don't know'.

    Context: {contexts} 

    Question: {questions}

    Answer:

    """

    # Payload with multiple inputs
    payload = {"inputs": inputs,
               #"parameters": {"min_length": 50}
    }

    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

for question, context in zip(questions, contexts):
    output = query(question, context)
    print("Question:", question)
    print("Answer:", output)

Question: What's my name?
Answer: [{'generated_text': 'Clara'}]
Question: How many patient files?
Answer: [{'generated_text': '10'}]
Question: Give me the insights?
Answer: [{'generated_text': "I don't know"}]
Question: What is the problem?
Answer: [{'generated_text': "I don't know"}]


## QA - for QA Models

In [278]:
def query(question, context):
    payload = {
        "inputs": {
            "question": question,
            "context": context
        },
        "parameters": {"min_length": 50}
    }
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()



outputs = [query(q, c) for q, c in zip(questions, contexts)]
for output in outputs:
    print(output)

{'score': 0.9326562285423279, 'start': 11, 'end': 16, 'answer': 'Clara'}
{'score': 0.1944204866886139, 'start': 63, 'end': 65, 'answer': '10'}
{'score': 0.00250326213426888, 'start': 63, 'end': 74, 'answer': '10 patients'}
{'score': 0.06242289021611214, 'start': 188, 'end': 207, 'answer': 'worse visual acuity'}


## ZERO - SHOT CLASSIFICATION

In [2]:
import json

# Path to your JSON file
file_path = './Extract_PDF/articles.json'

# Open the JSON file and load its contents
with open(file_path, 'r') as file:
    data = json.load(file)

In [3]:
inputs = [data[i]['Abstract'] for i in range(5)]
inputs

In [13]:
model = 'cross-encoder/nli-deberta-base'
API_URL = "https://api-inference.huggingface.co/models/" + model
def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

print("Labels: ['AI in Health', 'Clinical Trail',  'Medical Recovery', 'Patient Study',  'Digital Twin']" )
for input in inputs:
    output = query(
        {
            "inputs": input,
            "parameters": {"candidate_labels": ['Arts', 'Sports',  'Economy',  'History', 'Medicine']},
        }
        )
    try:
        print("Answer:", output['scores'])
    except:
        print(output)

Labels: ['AI in Health', 'Clinical Trail',  'Medical Recovery', 'Patient Study',  'Digital Twin']
Answer: [0.21885918080806732, 0.21561461687088013, 0.20043905079364777, 0.18664546310901642, 0.17844170331954956]
Answer: [0.34964480996131897, 0.18441031873226166, 0.1787862330675125, 0.14408916234970093, 0.14306947588920593]
Answer: [0.3047272264957428, 0.23937441408634186, 0.19979789853096008, 0.13245397806167603, 0.12364646047353745]
Answer: [0.23808392882347107, 0.21820209920406342, 0.2019212543964386, 0.19520464539527893, 0.14658807218074799]
Answer: [0.2589260935783386, 0.227860227227211, 0.20926468074321747, 0.15562690794467926, 0.14832209050655365]
