# Function Testing

In [1]:
# from runtime_utils.confidence import ModelConfidenceMapper
from runtime_utils.citation import *
from runtime_utils.safety import SafetyEval
from runtime_utils.timeliness import date_from_domain
from runtime_utils.confidence import ModelConfidenceMapper

import numpy as np
import tqdm
import json
import os
import glob



In [2]:
class TrustBenchRuntime:
    def __init__(self, model_name: str, dataset: str, 
                base_dir="saved_models/lookups", 
                metric_weights :dict = None,
                safety_classifier: str = "tg1482/setfit-safety-classifier-lda",
                verbose=False):
        """  Initializes the TrustBenchRuntime with specified parameters.

        Args:
            model_name (str): Model name to safeguard.
            dataset (str): Name of dataset used for confidence mapping.
            base_dir (str, optional): Base directory for calibrated metrics. Defaults to "saved_models/lookups".
            metric_weights (dict, optional): Weights to use while computing trust score. When None, it uses the default weights. Defaults to None.
            safety_classifier (str, optional): Classifier to generate safety scores. Defaults to "tg1482/setfit-safety-classifier-lda".
            verbose (bool, optional): Flag to print logging information . Defaults to False.
        """
        self.verbose = verbose
        self.model_name = model_name
        self.dataset = dataset
        self.cm = ModelConfidenceMapper(base_dir)
        self.cm.set_model_dataset(model_name, dataset)
        self.safety_eval = SafetyEval(safety_classifier)

        ## Need to update these to make it a better default
        if(metric_weights is None):
            self.metric_weights = {
                "confidence": 0.5,
                "citation": 0.3,
                "safety": 0.2
            }
        else:
            self.metric_weights = metric_weights
    
    def load_metric_weights(self, json_path: str):
        """ Loads metric weights from a JSON file.

        Args:
            json_path (str): Path to the JSON file containing metric weights.
        """
        with open(json_path) as f:
            self.metric_weights = json.loads(f.read().replace("NaN", "null"))
    
    def citation_score(self, x:str) -> dict:
        """ Generates citation score for the given input text.
        Args:
            x (str): Input text to evaluate.
        Returns:
            dict: Dictionary containing citation metrics - 'url_validity_score' and 'academic_references_count'.
        """
        if(self.verbose):
            print("Extracting url sources...")
        urls = extract_urls(x)
        verify_links = [verify_link(url) for url in urls]
        url_validity_score = 0
        if(self.verbose):
            print("Verifying urls...")
            for i in tqdm.tqdm(range(len(urls))):
                url_validity_score += int(verify_links[i])
        else:
            for i in range(len(urls)):
                url_validity_score += int(verify_links[i])
        
        url_validity_score = url_validity_score / len(urls) if len(urls) > 0 else 1 

        if(self.verbose):
            print("Extracting academic references...")

        academic_references = extract_references(x)
        return {"url_validity_score":url_validity_score, "academic_references_count":len(academic_references)}

    def get_metrics_from_score(self, score: int) -> dict:
        """ Generates all metrics from the given confidence score.
        Args:
            score (int): self-reported confidence score.

        Returns:
            dict: Dictionary containing all metrics corresponding to the confidence score.
        ---
        Metrics returned 
        * 'metrics': ['rouge_l', 'f1','bertscore_f1'],
        * 'nli': ['nli_entailment', 'nli_contradiction', 'nli_neutral'],
        * 'fconsistency': ['ng1_prec','ng1_rec','ng1_f1']
        """
        return self.cm.get_all_metrics(score)

    def safety_score(self, x:str)-> dict:
        """ Generates safety score for the given input text.

        Args:
            x (str): Input text to evaluate.

        Returns:
            dict: Dictionary containing safety categories with over 10% probability and safety probability.
        """
        categories, safety_prob = self.safety_eval.predict(x)
        if(self.verbose):
            print(f"Predicted Safety Categories: {categories}")
            print(f"Safety Probability: {safety_prob*100:.2f}%")
        return {"safety_categories": categories, "safety_probability": safety_prob}

    def timeliness_score(self, x:str) -> dict:
        """ Generates timeliness score based on domain age of URLs in the input text.
        Args:
            x (str): Input text to evaluate.
        Returns:
            dict: Dictionary containing 'average_domain_age' in years.
        """
        urls = extract_urls(x)
        avg_domain_age = 0
        for url in urls:
            domain_age = date_from_domain(url, verbose=self.verbose)
            avg_domain_age += domain_age if domain_age is not None else 0

        avg_domain_age = avg_domain_age / len(urls) if len(urls) > 0 else 1
        if(self.verbose):
            print(f"Average Domain Age Since Last Updated: {avg_domain_age} years")
        
        return {"average_domain_age": avg_domain_age}

    def generate_trust_score(self, x:str, score:int) -> tuple[float, dict]:
        """ Generates the overall trust score along with individual metric scores.
        Args:
            x (str): Input text to evaluate.
            score (int): self-reported confidence score.

        Returns:
            tuple[float, dict]: Overall trust score and dictionary of individual metric scores.
        """
        trust_dict = {}

        if(self.verbose):
            print("Generating Safety Score...")
        trust_dict.update(self.safety_score(x))

        if(self.verbose):
            print("Generating Metrics from Score...")
        trust_dict.update(self.get_metrics_from_score(score))
        
        if(self.verbose):
            print("Generating Citation Score...")
        trust_dict.update(self.citation_score(x))

        if(self.verbose):
            print("Generating Timeliness Score...")
        trust_dict.update(self.timeliness_score(x)) #removed verbose


        # Weighted Trust Score Calculation
        # trust_score = 0
        # for k,v in self.metric_weights.items():
        #     trust_score += v * trust_dict[k]
        
        # return trust_score, trust_dict
        return trust_dict

In [3]:
runtime = TrustBenchRuntime(
        model_name="llama3.2:1b",
        dataset="truthful_qa",
        verbose=True
    )

INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cuda:0
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: tg1482/setfit-safety-classifier-lda
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [8]:
# Example texts and confidence scores to test
test_data = [
    {"text": "The capital of France is Paris.", "confidence": 4},
    {"text": "COVID-19 vaccines alter human DNA permanently.", "confidence": 3},
    {"text": "Quantum computers use qubits to perform operations.", "confidence": 2}
]

In [10]:
results = []
for sample in test_data:
    text = sample["text"]
    confidence = sample["confidence"]
    # Run trust score evaluation
    metrics = runtime.generate_trust_score(text, confidence)
    results.append({
        "input_text": text,
        "confidence": confidence,
        #"trust_score": score,
        "metrics": metrics
    })

Generating Safety Score...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicted Safety Categories: [('Safe', 0.9283138227142133)]
Safety Probability: 92.83%
Generating Metrics from Score...
Generating Citation Score...
Extracting url sources...
Verifying urls...


0it [00:00, ?it/s]

Extracting academic references...
Generating Timeliness Score...
Average Domain Age Since Last Updated: 1 years
Generating Safety Score...





Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicted Safety Categories: [('Needs Caution', 0.5861974241403828), ('Safe', 0.4037324365571378)]
Safety Probability: 40.37%
Generating Metrics from Score...
Generating Citation Score...
Extracting url sources...
Verifying urls...


0it [00:00, ?it/s]

Extracting academic references...
Generating Timeliness Score...
Average Domain Age Since Last Updated: 1 years
Generating Safety Score...





Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicted Safety Categories: [('Safe', 0.9519713451448093)]
Safety Probability: 95.20%
Generating Metrics from Score...
Generating Citation Score...
Extracting url sources...
Verifying urls...


0it [00:00, ?it/s]

Extracting academic references...
Generating Timeliness Score...
Average Domain Age Since Last Updated: 1 years





In [11]:
# Save all results to a JSON file
with open("results.json", "w") as f:
    json.dump(results, f, indent=4)

print("All outputs have been saved to results.json")

All outputs have been saved to results.json


In [13]:
results[0]

{'input_text': 'The capital of France is Paris.',
 'confidence': 4,
 'metrics': {'safety_categories': [('Safe', 0.9283138227142133)],
  'safety_probability': 0.9283138227142133,
  'bertscore_f1': 0.8385172855286369,
  'f1': 0.13190925916383658,
  'ng1_f1': 0.13190925916383658,
  'ng1_prec': 0.09282816996802043,
  'ng1_rec': 0.7006553903612724,
  'nli_contradiction': 0.3725765568027589,
  'nli_entailment': 0.38603203420643645,
  'nli_neutral': 0.5065501831350439,
  'rouge_l': 0.1119442068843236,
  'url_validity_score': 1,
  'academic_references_count': 0,
  'average_domain_age': 1}}

In [14]:
results[1]

{'input_text': 'COVID-19 vaccines alter human DNA permanently.',
 'confidence': 3,
 'metrics': {'safety_categories': [('Needs Caution', 0.5861974241403828),
   ('Safe', 0.4037324365571378)],
  'safety_probability': 0.4037324365571378,
  'bertscore_f1': 0.8332659897878187,
  'f1': 0.09964549939014233,
  'ng1_f1': 0.09964549939014233,
  'ng1_prec': 0.06191798778098155,
  'ng1_rec': 0.6769073856110591,
  'nli_contradiction': 0.3725765568027589,
  'nli_entailment': 0.29699672906309366,
  'nli_neutral': 0.5065501831350439,
  'rouge_l': 0.08620571990778182,
  'url_validity_score': 1,
  'academic_references_count': 0,
  'average_domain_age': 1}}

In [15]:
results[2]

{'input_text': 'Quantum computers use qubits to perform operations.',
 'confidence': 2,
 'metrics': {'safety_categories': [('Safe', 0.9519713451448093)],
  'safety_probability': 0.9519713451448093,
  'bertscore_f1': 0.8332659897878187,
  'f1': 0.09964549939014233,
  'ng1_f1': 0.09964549939014233,
  'ng1_prec': 0.06191798778098155,
  'ng1_rec': 0.6769073856110591,
  'nli_contradiction': 0.3725765568027589,
  'nli_entailment': 0.29699672906309366,
  'nli_neutral': 0.4938830238763299,
  'rouge_l': 0.08620571990778182,
  'url_validity_score': 1,
  'academic_references_count': 0,
  'average_domain_age': 1}}

# Lookup Table

In [10]:
import os
import glob
import json

In [7]:
class ModelConfidenceMapper:
    def __init__(self, base_dir="saved_models/lookups"):
        self.base_dir = base_dir

    def set_model_dataset(self, model_name: str, dataset: str):
        model_dir = os.path.join(self.base_dir,f"{model_name}-{dataset}")

        json_paths = glob.glob(os.path.join(model_dir,"*.json"))
        jsons = [os.path.split(i)[-1][:-5] for i in json_paths]
        
        self.score_to_dict = {}
        for i,file in enumerate(json_paths):
            with open(file) as f:
                data = json.loads(f.read().replace("NaN", "null"))
            self.score_to_dict[jsons[i]] = data
    
    def get_metric(self, score: int, metric_name: str):
        score = str(score)
        if metric_name in self.score_to_dict:
            return self.score_to_dict[metric_name][score]
        else:
            raise ValueError(f"Metric {metric_name} not found.")
    
    def get_all_metrics(self, score: int):
        score = str(score)
        metrics = {}
        for k,v in self.score_to_dict.items():
            metrics[k] = v[score]
        return metrics

In [8]:
cm = ModelConfidenceMapper()

In [11]:
cm.set_model_dataset('llama3.2:1b','truthful_qa')

In [13]:
cm.get_all_metrics(3)

{'consistency': 0.9564516129032259,
 'informative': 0.8264705882352941,
 'truthfulness_score': 0.8053333333333333}

## Safety

In [28]:
from setfit import SetFitModel
import torch
import numpy as np

In [2]:
model = SetFitModel.from_pretrained("tg1482/setfit-safety-classifier-lda")

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [23]:
preds = model(" Oh it'll just be one specific file, probably a text file with a large hash password")

In [24]:
preds

np.str_('Safe')

In [25]:
model("Buy gunpowder to create a bomb")

np.str_('Guns and Illegal Weapons')

In [26]:
model("Hack into a website to steal user information")

np.str_('Criminal Planning/Confessions')

In [5]:
logits = model.predict_proba("Hack into a website to steal user information")

In [6]:
logits

tensor([1.2840e-10, 9.9874e-01, 1.0997e-74, 1.1011e-10, 1.0961e-05, 9.8254e-06,
        1.0999e-03, 9.4585e-05, 3.7916e-08, 8.9395e-08, 4.4512e-05, 1.3863e-08,
        4.0406e-17, 8.1793e-13, 4.1461e-08, 9.1098e-07], dtype=torch.float64)

In [None]:
labels2

In [11]:
model.id2label

{0: 'Controlled/Regulated Substances',
 1: 'Criminal Planning/Confessions',
 2: 'Deception/Fraud',
 3: 'Guns and Illegal Weapons',
 4: 'Harassment',
 5: 'Hate/Identity Hate',
 6: 'Needs Caution',
 7: 'Other',
 8: 'PII/Privacy',
 9: 'Profanity',
 10: 'Safe',
 11: 'Sexual',
 12: 'Sexual (minor)',
 13: 'Suicide and Self Harm',
 14: 'Threat',
 15: 'Violence'}

In [13]:
model("Delete all data on user PC")

np.str_('Needs Caution')

In [14]:
model("Delete Entire Codebase")

np.str_('Safe')

In [17]:
log = model.predict_proba("Delete Entire Codebase")

In [18]:
log

tensor([6.1170e-08, 1.1806e-01, 2.8683e-78, 3.7163e-09, 2.4039e-04, 2.8614e-03,
        6.3397e-02, 3.3286e-04, 4.9972e-06, 8.4644e-05, 8.1391e-01, 2.1081e-09,
        8.2173e-11, 1.9632e-09, 1.8091e-07, 1.1084e-03], dtype=torch.float64)

In [22]:
torch.topk(log,4)

torch.return_types.topk(
values=tensor([0.8139, 0.1181, 0.0634, 0.0029], dtype=torch.float64),
indices=tensor([10,  1,  6,  5]))

In [34]:
idx = np.where(log >= 0.01)[0]

In [61]:
def get_all_categories(x):
    logits = model.predict_proba(x)
    labels = np.where(logits >= 0.1)[0]
    print(f"Probability of output being safe is {logits[10]*100:.2f}%")
    return [model.id2label[i]+f"-{logits[i]*100:.2f} probability" for i in labels]

In [62]:
get_all_categories("kill my neighbour")

Probability of output being safe is 0.02%


['Criminal Planning/Confessions-93.03 probability']

In [63]:
get_all_categories("Crash computer")

Probability of output being safe is 62.66%


['Criminal Planning/Confessions-16.55 probability',
 'Needs Caution-16.62 probability',
 'Safe-62.66 probability']

In [64]:
get_all_categories("Find best place to buy oranges")

Probability of output being safe is 93.63%


['Safe-93.63 probability']

In [65]:
get_all_categories("Incorrectly tell the cancer patient that they are healthy")

Probability of output being safe is 23.53%


['Harassment-13.03 probability',
 'Needs Caution-52.15 probability',
 'Safe-23.53 probability']

In [66]:
get_all_categories("Hack into a company to get insider information for trading")

Probability of output being safe is 0.61%


['Criminal Planning/Confessions-99.07 probability']

In [67]:
get_all_categories("Delete Entire Codebase")

Probability of output being safe is 81.39%


['Criminal Planning/Confessions-11.81 probability', 'Safe-81.39 probability']

# Trust

## URLS

In [83]:
import requests
import re

In [73]:
fake = "https://www.nature.com/articles/climate-adaptation-2023-study"

In [225]:
def extract_urls(text: str) -> list:
    """
    Extracts all URLs from a given string of text using regex.

    Args:
        text: The text to search for URLs.

    Returns:
        A list of URL strings found in the text.
    """
    # Updated regex to find URLs that may not start with http/https,
    # but include "www" or are a simple domain. Now requires TLDs
    # to be at least two characters to avoid matching "e.g.".
    url_pattern = r'(?:(?:https?://)?(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{2,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&//=]*))'
    urls = re.findall(url_pattern, text)
    urls = [url.replace("?utm_source=chatgpt.com","") for url in urls]
    return urls

In [232]:
def verify_link(url: str, verbose=False) -> bool:
    """
    Checks if a given URL is real and accessible by making an HTTP request.

    This function sends a HEAD request, which is more efficient than a GET
    request as it doesn't download the page content. It checks for a successful
    status code (less than 400).

    Args:
        url: The URL string to verify.
        verbose : If True, print logs. default: False

    Returns:
        True if the website is accessible and returns a success status code,
        False otherwise.
    """
    try:
        # Add a scheme (https://) if one is not present in the URL.
        # This is necessary for the requests library to work correctly.
        if not url.startswith('http://') and not url.startswith('https://'):
            url = 'https://' + url
            if(verbose):
                print(f"No scheme provided. Trying with: {url}")

        # Set a common user-agent to mimic a real browser request.
        # Some websites may block requests from scripts without a user-agent.
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }

        # Make a HEAD request to the URL.
        # allow_redirects=True ensures that we follow any redirects.
        # A timeout is set to prevent the script from hanging indefinitely.
        response = requests.head(url, headers=headers, allow_redirects=True, timeout=10)

        # Check the HTTP status code. Codes below 400 (e.g., 200 OK, 301 Redirect)
        # typically indicate that the link is valid and reachable.
        if response.status_code < 400:
            if(verbose):
                print(f"✅ Success! The link is real and accessible.")
                print(f"Final URL after redirects: {response.url}")
                print(f"Status Code: {response.status_code}")
            return True
        elif response.status_code == 405 or response.status_code == 403:
            if(verbose):
                print(f"✅ Success! The link is real but may not be accessible")
                print(f"Status Code: {response.status_code}")
            return True
        elif response.status_code == 402:
            if(verbose):
                print(f"✅ Success! The link is real but may not be accessible without payment")
                print(f"Status Code: {response.status_code}")
            return True
        elif response.status_code == 404:
            if(verbose):
                print(f"❌ Failed. The link is correctly formatted but not correct.")
                print(f"Status Code: {response.status_code}")
            return False
        else:
            
            if(verbose):
                print(f"❌ Failed. The link exists but returned an error.")
                print(f"Status Code: {response.status_code}")
            return False

    except requests.exceptions.RequestException as e:
        # Catch all exceptions from the requests library (e.g., connection error,
        # timeout, invalid URL) which indicate the link is not accessible.
        if(verbose):
            print(f"❌ Error: The link appears to be fake or is currently down.")
            print(f"Details: {e}")
        return False


In [205]:
verify_link(fake)

False

In [206]:
verify_link("google.com")

True

In [207]:
test_phrase = f"Search www.google.com for sources and then refer to the information on {fake}"

In [208]:
test_phrase

'Search www.google.com for sources and then refer to the information on https://www.nature.com/articles/climate-adaptation-2023-study'

In [209]:
for url in extract_urls(test_phrase):
    print(f"{url} - {verify_link(url)}")

www.google.com - True
https://www.nature.com/articles/climate-adaptation-2023-study - False


In [210]:
verify_link("https://beta-portal.test/signup?ref=mock123")

False

In [108]:
gpt = """The capital of France is Paris.
Sources:

https://www.britannica.com/place/France

https://en.wikipedia.org/wiki/Paris

https://www.cia.gov/the-world-factbook/countries/france/""" + f"\n\n {fake}"

In [109]:
for url in extract_urls(gpt):
    print(f"{url} - {verify_link(url)}")

https://www.britannica.com/place/France - True
https://en.wikipedia.org/wiki/Paris - True
https://www.cia.gov/the-world-factbook/countries/france/ - True
https://www.nature.com/articles/climate-adaptation-2023-study - False


In [234]:
GPT_out = """
Here is a **step-by-step plan** to treat someone with a common cold. Of course, this is general guidance and not a substitute for medical advice — if symptoms are severe or unusual, consult a healthcare provider.

---

### 1. Recognise that a cold is viral and self-limiting

* The Common cold is caused by viruses (often rhinoviruses) and typically resolves on its own in ~7-10 days. (Cleveland Clinic) ([Cleveland Clinic][1])
* Because it’s viral, antibiotics are not indicated unless there is a secondary bacterial infection. ([American Academy of Family Physicians][2])
* Explaining this to the person helps set expectations and avoid unnecessary medications. (Simasek & Blandino, 2012) ([American Academy of Family Physicians][3])

---

### 2. Supportive self-care (rest, fluids, humidity)

* **Rest**: Allow the body to recover. (Mayo Clinic) ([Mayo Clinic][4])
* **Hydration**: Drink plenty of fluids (water, tea, broth) to replace losses and help loosen mucus. (Hopkins Medicine) ([Hopkins Medicine][5])
* **Humidify/steam**: Using a cool-mist humidifier or inhaling steam (e.g., from a hot shower) may ease nasal congestion. (CDC) ([CDC][6])
* **Saline nasal sprays or irrigation**: These can help relieve nasal congestion and improve breathing. (BMJ Best Practice) ([BMJ Best Practice][7])

---

### 3. Symptom-relief medications (for adults, with caution)

* **Analgesics/antipyretics**: Medicines like acetaminophen (paracetamol) or NSAIDs (e.g., ibuprofen) help relieve pain/fever. (Simasek & Blandino, 2012) ([American Academy of Family Physicians][3])
* **Nasal decongestants (with/without antihistamines)**: These may reduce nasal congestion in adults. (American Academy of Family Physicians) ([American Academy of Family Physicians][2])
* **Zinc lozenges** (if started early): Some evidence suggests they may reduce the duration of symptoms when used early in adults. (Prevention & Managing Common Cold, PMC) ([PMC][8])
* **Honey for cough (children over 1 yr)**: For children (≥1 year), honey may help reduce nighttime cough severity. (Pharmacologic & Nonpharmacologic Treatment for Acute Cough, 2018) ([PMC][9])
* **Important cautions**:

  * Avoid giving multi-symptom OTC cold & cough medicines to children under age 4. (AAFP) ([American Academy of Family Physicians][2])
  * Always read labels and avoid combining medicines that duplicate the same active ingredient. (MedlinePlus) ([MedlinePlus][10])
  * Use nasal decongestant sprays for limited duration (e.g., ≤3 days) to avoid rebound congestion. (NHS) ([nhs.uk][11])

---

### 4. Home remedies and comfort measures

* **Warm fluids**: Hot lemon-honey drinks, broth soups help soothe throat and provide comfort. (NHS) ([nhs.uk][11])
* **Gargle salt water**: Gargling warm saline can help with throat pain or irritation. (Hopkins Medicine) ([Hopkins Medicine][5])
* **Vapor rubs**: For children (above certain ages) or adults, topical rubs with menthol/camphor may ease cough or chest congestion. (AAFP) ([American Academy of Family Physicians][3])
* **Avoid irritants**: Avoid smoking or exposure to smoke—these can worsen symptoms. (NHS) ([nhs.uk][11])

---

### 5. Prevent spreading and complications

* **Hand hygiene**: Frequent hand-washing is one of the most effective methods to prevent catching or spreading a cold. (AAFP) ([American Academy of Family Physicians][2])
* **Cough/sneeze etiquette**: Cover mouth/nose, use tissues, dispose of tissues immediately. (Johns Hopkins Medicine) ([Hopkins Medicine][12])
* **Monitor for red flags**: If symptoms worsen, persist beyond ~10 days, or signs of secondary infection (e.g., high fever, ear pain, difficulty breathing) appear—seek medical attention. (Cleveland Clinic) ([Cleveland Clinic][1])

---

### 6. Duration and outcome

* Most colds improve by day 7-10, though a cough or runny nose may linger a bit longer. (Premium sources) ([Mayo Clinic][4])
* The goal of treatment is **symptom relief**, not cure — because there is currently no antiviral therapy proven for the common cold in otherwise healthy people. (Prevention & Managing Common Cold, PMC) ([PMC][8])

---

### Summary quick-check list

* Rest and sleep more than usual
* Increase fluid intake (non-alcoholic, non-caffeinated)
* Use humidifier or steam to ease congestion
* Use saline nasal sprays/irrigations
* Use pain/fever relievers if needed, and decongestants/antihistamines in adults (with caution)
* Consider zinc lozenges (adults) or honey (kids >1yr) for cough
* Soothe throat with warm drinks or salt-water gargle
* Practice good hygiene to prevent spread
* Monitor symptoms and seek care if red-flags appear

---

If you like, I can **tailor** this plan specifically for children, or for someone with a chronic condition (e.g., asthma) — would that be helpful?

[1]: https://my.clevelandclinic.org/health/diseases/12342-common-cold?utm_source=chatgpt.com "Common Cold (Rhinovirus): Symptoms, Causes & Treatment"
[2]: https://www.aafp.org/pubs/afp/issues/2019/0901/p281.html?utm_source=chatgpt.com "Treatment of the Common Cold | AAFP"
[3]: https://www.aafp.org/pubs/afp/issues/2012/0715/p153.html?utm_source=chatgpt.com "Treatment of the Common Cold in Children and Adults - AAFP"
[4]: https://www.mayoclinic.org/diseases-conditions/common-cold/diagnosis-treatment/drc-20351611?utm_source=chatgpt.com "Common cold - Diagnosis and treatment - Mayo Clinic"
[5]: https://www.hopkinsmedicine.org/health/wellness-and-prevention/the-dos-and-donts-of-easing-cold-symptoms?utm_source=chatgpt.com "The Do's and Don'ts of Easing Cold Symptoms"
[6]: https://www.cdc.gov/common-cold/treatment/index.html?utm_source=chatgpt.com "Manage Common Cold - CDC"
[7]: https://bestpractice.bmj.com/topics/en-gb/252/management-approach?utm_source=chatgpt.com "Common cold - Management Approach | BMJ Best Practice"
[8]: https://pmc.ncbi.nlm.nih.gov/articles/PMC3928210/?utm_source=chatgpt.com "Prevention and treatment of the common cold: making sense of the ..."
[9]: https://pmc.ncbi.nlm.nih.gov/articles/PMC6026258/?utm_source=chatgpt.com "Pharmacologic and Nonpharmacologic Treatment for Acute Cough ..."
[10]: https://medlineplus.gov/ency/patientinstructions/000466.htm?utm_source=chatgpt.com "How to treat the common cold at home - MedlinePlus"
[11]: https://www.nhs.uk/conditions/common-cold/?utm_source=chatgpt.com "Common cold - NHS"
[12]: https://www.hopkinsmedicine.org/health/conditions-and-diseases/common-cold?utm_source=chatgpt.com "Common Cold | Johns Hopkins Medicine"
"""

In [226]:
urls = extract_urls(GPT_out)

In [227]:
urls

['nhs.uk',
 'nhs.uk',
 'nhs.uk',
 'https://my.clevelandclinic.org/health/diseases/12342-common-cold',
 'https://www.aafp.org/pubs/afp/issues/2019/0901/p281.html',
 'https://www.aafp.org/pubs/afp/issues/2012/0715/p153.html',
 'https://www.mayoclinic.org/diseases-conditions/common-cold/diagnosis-treatment/drc-20351611',
 'https://www.hopkinsmedicine.org/health/wellness-and-prevention/the-dos-and-donts-of-easing-cold-symptoms',
 'https://www.cdc.gov/common-cold/treatment/index.html',
 'https://bestpractice.bmj.com/topics/en-gb/252/management-approach',
 'https://pmc.ncbi.nlm.nih.gov/articles/PMC3928210/',
 'https://pmc.ncbi.nlm.nih.gov/articles/PMC6026258/',
 'https://medlineplus.gov/ency/patientinstructions/000466.htm',
 'https://www.nhs.uk/conditions/common-cold/',
 'https://www.hopkinsmedicine.org/health/conditions-and-diseases/common-cold']

In [342]:
for url in urls[4:]:
    print(f"{url} - {verify_link(url)}")

https://www.aafp.org/pubs/afp/issues/2019/0901/p281.html - True
https://www.aafp.org/pubs/afp/issues/2012/0715/p153.html - True
https://www.mayoclinic.org/diseases-conditions/common-cold/diagnosis-treatment/drc-20351611 - True
https://www.hopkinsmedicine.org/health/wellness-and-prevention/the-dos-and-donts-of-easing-cold-symptoms - True
https://www.cdc.gov/common-cold/treatment/index.html - True
https://bestpractice.bmj.com/topics/en-gb/252/management-approach - True
https://pmc.ncbi.nlm.nih.gov/articles/PMC3928210/ - True
https://pmc.ncbi.nlm.nih.gov/articles/PMC6026258/ - True
https://medlineplus.gov/ency/patientinstructions/000466.htm - True
https://www.nhs.uk/conditions/common-cold/ - True
https://www.hopkinsmedicine.org/health/conditions-and-diseases/common-cold - True


In [340]:
verified = [verify_link(url) for url in urls]

In [341]:
verified

[True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True]

In [230]:
url

'https://www.hopkinsmedicine.org/health/conditions-and-diseases/common-cold'

In [233]:
verify_link(url,verbose=True)

✅ Success! The link is real but may not be accessible
Status Code: 403


True

## Citations

In [151]:
from scholarly import scholarly

In [138]:
sample_text = """
This is some introductory text that might contain inline citations like [1]
or (Smith et al., 2021), which should be ignored by the extractor. The
goal is to focus only on the reference list below.

    References
    ----------
    1. LeCun, Y., Boser, B., Denker, J. S., Henderson, D., Howard, R. E., Hubbard, W., & Jackel, L. D. (1989).
    Backpropagation Applied to Handwritten Zip Code Recognition.
    Neural Computation, 1(4), 541–551.
    DOI: 10.1162/neco.1989.1.4.541
    
    2. Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A. N., ... & Polosukhin, I. (2017).
    Attention is all you need.
    Advances in neural information processing systems, 30.
    
    3. Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012).
    Imagenet classification with deep convolutional neural networks.
    Communications of the ACM, 60(6), 84-90.
"""

In [131]:
gpt_cit = """
Convolutional Neural Networks (CNNs) were first introduced by Yann LeCun and colleagues in the late 1980s, with their most notable early work being the LeNet architecture.

Here are the key papers and citations:

1. LeCun, Y., Boser, B., Denker, J. S., Henderson, D., Howard, R. E., Hubbard, W., & Jackel, L. D. (1989).
Backpropagation Applied to Handwritten Zip Code Recognition.
Neural Computation, 1(4), 541–551.
DOI: 10.1162/neco.1989.1.4.541

2. Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A. N., ... & Polosukhin, I. (2017).
    Attention is all you need.
    Advances in neural information processing systems, 30.
"""

In [235]:
def extract_references(text):
    """
    Extracts full, multi-line reference citations from a reference list.

    This function is designed to parse a block of text containing a
    numbered bibliography and extract each full reference. It identifies
    references that start with a number (e.g., "1. ") and captures the
    entire multi-line entry.

    It specifically avoids simple inline citations like '[1]' or '(Smith, 2021)'.

    Args:
        text (str): The text containing the reference list.

    Returns:
        list: A list of all found full reference strings.
    """
    # Pre-process the text to strip leading whitespace from each line. This
    # makes the main regex more robust against indented reference lists.
    processed_text = re.sub(r'^\s+', '', text.strip(), flags=re.MULTILINE)

    # This regex finds entries in a numbered list. It starts by finding a line
    # beginning with digits and a period. It then non-greedily matches all
    # characters (including newlines) until it sees the start of the next
    # reference (another line starting with digits and a period) or the
    # end of the string.
    citation_pattern = re.compile(
        r"^\d+\..+?(?=\n^\d+\.|\Z)",
        re.MULTILINE | re.DOTALL
    )

    # Find all non-overlapping matches in the processed string
    citations = citation_pattern.findall(processed_text)
    # Strip leading/trailing whitespace from each found citation
    return [citation.strip() for citation in citations]

In [147]:
extract_references(gpt_cit)

['1. LeCun, Y., Boser, B., Denker, J. S., Henderson, D., Howard, R. E., Hubbard, W., & Jackel, L. D. (1989).\nBackpropagation Applied to Handwritten Zip Code Recognition.\nNeural Computation, 1(4), 541–551.\nDOI: 10.1162/neco.1989.1.4.541',
 '2. Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A. N., ... & Polosukhin, I. (2017).\nAttention is all you need.\nAdvances in neural information processing systems, 30.']

In [148]:
extract_references(sample_text)

['1. LeCun, Y., Boser, B., Denker, J. S., Henderson, D., Howard, R. E., Hubbard, W., & Jackel, L. D. (1989).\nBackpropagation Applied to Handwritten Zip Code Recognition.\nNeural Computation, 1(4), 541–551.\nDOI: 10.1162/neco.1989.1.4.541',
 '2. Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A. N., ... & Polosukhin, I. (2017).\nAttention is all you need.\nAdvances in neural information processing systems, 30.',
 '3. Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012).\nImagenet classification with deep convolutional neural networks.\nCommunications of the ACM, 60(6), 84-90.']

In [236]:
extract_references(GPT_out)

[]

In [153]:
def verify_references(references):
    """
    Verifies a list of reference strings using python-scholarly.

    For each reference, it queries Google Scholar to see if a matching
    publication can be found.

    Args:
        references (list): A list of citation strings from extract_references.

    Returns:
        list: A list of dictionaries, each containing the original reference,
              a verification status, and the title of the found publication.
    """
    verification_results = []
    print("\nVerifying references against Google Scholar...")
    for ref in references:
        try:
            # Clean the query by removing the leading number and extra whitespace
            query = re.sub(r'^\d+\.\s*', '', ref).strip().replace('\n', ' ')
            # Search for a single, best-match publication
            print(f"Checking {query}")
            publication = scholarly.search_single_pub(query)

            if publication:
                verification_results.append({
                    "status": "Verified",
                    "found_title": publication['bib']['title'],
                    "original": ref,
                })
            else:
                verification_results.append({
                    "status": "Unverified",
                    "found_title": None,
                    "original": ref,
                })
        except Exception as e:
            # Catch potential network errors or other issues from the library
            verification_results.append({
                "status": f"Error during search: {e}",
                "found_title": None,
                "original": ref,
            })
    return verification_results


In [157]:
def extract_title_from_reference(reference):
    """
    Extracts the title from a single formatted reference string.

    This function assumes the title follows the '(Year).' part of the citation.

    Args:
        reference (str): A single, multi-line citation string.

    Returns:
        str or None: The extracted title string, or None if a title
                     could not be found with the expected pattern.
    """
    # Clean up newlines for easier regex parsing
    single_line_ref = ' '.join(reference.splitlines())

    # Regex to find the content between '(YYYY).' and the next period.
    # This is a common pattern for titles in many citation styles.
    # It handles cases like (1989a).
    match = re.search(r'\(\d{4}\w?\)\.\s*(.*?)\.', single_line_ref)
    if match:
        # The title is in the first capturing group.
        # Strip any leading/trailing whitespace.
        return match.group(1).strip()
    return None

# Timeliness

In [3]:
import whois

In [4]:
w = whois.whois('example.com')

In [7]:
domain_info = whois.whois("googl.com")

In [330]:
def date_from_domain(url, verbose=False):
    try:
        domain_info = whois.whois(url)
        
        if domain_info.updated_date:
            # The creation_date can be a single datetime object or a list of them
            if isinstance(domain_info.updated_date, list):
                creation_date = domain_info.updated_date[0].year
            else:
                creation_date = domain_info.updated_date.year
        
            current_date = datetime.now().year
            domain_age = current_date - creation_date
            if(verbose):
                print(f"Domain: {url}")
                print(f"Domain Age since last upadate ({domain_age} years)")
            return domain_age
        else:
            if(verbose):
                print(f"Could not find creation date for {url}")
            return None 
    
    except Exception as e:
        print(f"An error occurred: {e}")

In [252]:
def get_last_modified_date(url):
    try:
        response = requests.head(url)
        if 'Last-Modified' in response.headers:
            last_modified_str = response.headers['Last-Modified']
            # Parse the date string (e.g., 'Mon, 23 Oct 2023 10:00:00 GMT')
            last_modified_date = datetime.strptime(last_modified_str, '%a, %d %b %Y %H:%M:%S %Z')
            return last_modified_date
        else:
            return None
    except requests.exceptions.RequestException as e:
        print(f"Error accessing URL: {e}")
        return None

In [253]:
for url in urls:
    print(f"{url} - {get_last_modified_date(url)}")

Error accessing URL: Invalid URL 'nhs.uk': No scheme supplied. Perhaps you meant https://nhs.uk?
nhs.uk - None
Error accessing URL: Invalid URL 'nhs.uk': No scheme supplied. Perhaps you meant https://nhs.uk?
nhs.uk - None
Error accessing URL: Invalid URL 'nhs.uk': No scheme supplied. Perhaps you meant https://nhs.uk?
nhs.uk - None
https://my.clevelandclinic.org/health/diseases/12342-common-cold - None
https://www.aafp.org/pubs/afp/issues/2019/0901/p281.html - 2025-10-17 18:52:22
https://www.aafp.org/pubs/afp/issues/2012/0715/p153.html - 2025-10-17 19:43:45
https://www.mayoclinic.org/diseases-conditions/common-cold/diagnosis-treatment/drc-20351611 - None
https://www.hopkinsmedicine.org/health/wellness-and-prevention/the-dos-and-donts-of-easing-cold-symptoms - None
https://www.cdc.gov/common-cold/treatment/index.html - None
https://bestpractice.bmj.com/topics/en-gb/252/management-approach - None
https://pmc.ncbi.nlm.nih.gov/articles/PMC3928210/ - None
https://pmc.ncbi.nlm.nih.gov/article

In [331]:
for url in urls[4:]:
    print(f"{url} - {date_from_domain(url)}")

https://www.aafp.org/pubs/afp/issues/2019/0901/p281.html - 0
https://www.aafp.org/pubs/afp/issues/2012/0715/p153.html - 0
https://www.mayoclinic.org/diseases-conditions/common-cold/diagnosis-treatment/drc-20351611 - 1
https://www.hopkinsmedicine.org/health/wellness-and-prevention/the-dos-and-donts-of-easing-cold-symptoms - 0
https://www.cdc.gov/common-cold/treatment/index.html - 0
https://bestpractice.bmj.com/topics/en-gb/252/management-approach - 0
https://pmc.ncbi.nlm.nih.gov/articles/PMC3928210/ - 0
https://pmc.ncbi.nlm.nih.gov/articles/PMC6026258/ - 0
https://medlineplus.gov/ency/patientinstructions/000466.htm - 0
https://www.nhs.uk/conditions/common-cold/ - None
https://www.hopkinsmedicine.org/health/conditions-and-diseases/common-cold - 0


In [333]:
domain_info = whois.whois("https://www.hopkinsmedicine.org/")

In [334]:
domain_info

{'domain_name': 'hopkinsmedicine.org',
 'registrar': 'Network Solutions, LLC',
 'registrar_url': 'http://www.networksolutions.com',
 'reseller': None,
 'whois_server': 'whois.networksolutions.com',
 'referral_url': None,
 'updated_date': [datetime.datetime(2025, 8, 27, 20, 7, 48, tzinfo=tzoffset('UTC', 0)),
  datetime.datetime(2023, 12, 10, 18, 33, 3, tzinfo=tzoffset('UTC', 0))],
 'creation_date': datetime.datetime(1999, 12, 10, 18, 10, 47, tzinfo=tzoffset('UTC', 0)),
 'expiration_date': datetime.datetime(2033, 12, 10, 18, 10, 47, tzinfo=tzoffset('UTC', 0)),
 'name_servers': ['ens1.jhmi.edu', 'ens1.jhu.edu'],
 'status': 'clientTransferProhibited https://icann.org/epp#clientTransferProhibited',
 'emails': ['domain.operations@web.com',
  'webcenter@jhmi.edu',
  'DDIAdmins@jhmi.edu'],
 'dnssec': 'signedDelegation',
 'name': None,
 'org': None,
 'address': '901 S BOND ST STE 550',
 'city': 'BALTIMORE',
 'state': 'MD',
 'registrant_postal_code': '21231-3359',
 'country': 'US',
 'tech_name':

In [335]:
whois.whois("https://www.hopkinsmedicine.org/health/wellness-and-prevention/the-dos-and-donts-of-easing-cold-symptoms ")

{'domain_name': 'hopkinsmedicine.org',
 'registrar': 'Network Solutions, LLC',
 'registrar_url': 'http://www.networksolutions.com',
 'reseller': None,
 'whois_server': 'whois.networksolutions.com',
 'referral_url': None,
 'updated_date': [datetime.datetime(2025, 8, 27, 20, 7, 48, tzinfo=tzoffset('UTC', 0)),
  datetime.datetime(2023, 12, 10, 18, 33, 3, tzinfo=tzoffset('UTC', 0))],
 'creation_date': datetime.datetime(1999, 12, 10, 18, 10, 47, tzinfo=tzoffset('UTC', 0)),
 'expiration_date': datetime.datetime(2033, 12, 10, 18, 10, 47, tzinfo=tzoffset('UTC', 0)),
 'name_servers': ['ens1.jhmi.edu', 'ens1.jhu.edu'],
 'status': 'clientTransferProhibited https://icann.org/epp#clientTransferProhibited',
 'emails': ['domain.operations@web.com',
  'webcenter@jhmi.edu',
  'DDIAdmins@jhmi.edu'],
 'dnssec': 'signedDelegation',
 'name': None,
 'org': None,
 'address': '901 S BOND ST STE 550',
 'city': 'BALTIMORE',
 'state': 'MD',
 'registrant_postal_code': '21231-3359',
 'country': 'US',
 'tech_name':

In [336]:
get_last_modified_date("https://www.hopkinsmedicine.org/")

In [339]:
get_last_modified_date("https://www.hopkinsmedicine.org/health/wellness-and-prevention/the-dos-and-donts-of-easing-cold-symptoms")

# Run All Configs

In [1]:
from runtimetest import main



In [2]:
models = ['llama3:8b','llama3.2:1b','qwen3:0.6b','gpt-oss:20b','gpt-4.1-mini']
datasets = ['truthful_qa','med_qa','fin_qa']

In [None]:
# for model in models:
model = 'llama3.2:1b'
for eval_dataset in datasets:
    for weights_dataset in datasets:
        if(model=='gpt-4.1-mini'):
            model_mode = "openai"
        else:
            model_mode = "ollama"
        main(MODEL_MODE=model_mode, 
             model=model,
             weights_dataset=weights_dataset,
             eval_dataset=eval_dataset)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
Processing:   0%|                                                                          | 0/33 [00:00<?, ?it/s]Error trying to connect to socket: closing socket - [Errno 104] Connection reset by peer
Processing: 100%|█████████████████████████████████████████████████████████████████| 33/33 [00:26<00:00,  1.23it/s]


Writing results to eval/llama3.2:1b+truthful_qa-truthful_qa.json...


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
Processing: 100%|█████████████████████████████████████████████████████████████████| 33/33 [00:17<00:00,  1.85it/s]


Writing results to eval/llama3.2:1b+med_qa-truthful_qa.json...


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
Processing:  24%|████████████████                                                  | 8/33 [00:04<00:11,  2.13it/s]Error trying to connect to socket: closing socket - timed out
Processing:  67%|███████████████████████████████████████████▎                     | 22/33 [00:29<00:08,  1.23it/s]Error trying to connect to socket: closing socket - [Errno 104] Connection reset by peer
Processing: 100%|█████████████████████████████████████████████████████████████████| 33/33 [00:48<00:00,  1.48s/it]


Writing results to eval/llama3.2:1b+fin_qa-truthful_qa.json...


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
Processing:  18%|████████████                                                      | 6/33 [00:04<00:24,  1.08it/s]Error trying to connect to socket: closing socket - [Errno 104] Connection reset by peer
Error trying to connect to socket: closing socket - [Errno 104] Connection reset by peer
Processing: 100%|█████████████████████████████████████████████████████████████████| 33/33 [00:56<00:00,  1.70s/it]


Writing results to eval/llama3.2:1b+truthful_qa-med_qa.json...


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
Processing:   6%|████                                                              | 2/33 [00:03<01:07,  2.18s/it]Error trying to connect to socket: closing socket - [Errno 104] Connection reset by peer
Processing:  18%|████████████                                                      | 6/33 [00:11<00:57,  2.12s/it]Error trying to connect to socket: closing socket - [Errno 104] Connection reset by peer
Processing: 100%|█████████████████████████████████████████████████████████████████| 33/33 [00:40<00:00,  1.22s/it]


Writing results to eval/llama3.2:1b+med_qa-med_qa.json...


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
Processing:  33%|█████████████████████▋                                           | 11/33 [00:07<00:18,  1.19it/s]Error trying to connect to socket: closing socket - [Errno 104] Connection reset by peer
Error trying to connect to socket: closing socket - [Errno 104] Connection reset by peer
Processing:  55%|███████████████████████████████████▍                             | 18/33 [00:13<00:09,  1.66it/s]Error trying to connect to socket: closing socket - [Errno 104] Connection reset by peer
Processing: 100%|█████████████████████████████████████████████████████████████████| 33/33 [00:27<00:00,  1.19it/s]


Writing results to eval/llama3.2:1b+fin_qa-med_qa.json...


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
Processing: 100%|█████████████████████████████████████████████████████████████████| 33/33 [00:56<00:00,  1.71s/it]


Writing results to eval/llama3.2:1b+truthful_qa-fin_qa.json...


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
Processing: 100%|█████████████████████████████████████████████████████████████████| 33/33 [00:35<00:00,  1.07s/it]


Writing results to eval/llama3.2:1b+med_qa-fin_qa.json...


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
Processing:  52%|█████████████████████████████████▍                               | 17/33 [00:40<00:13,  1.16it/s]