In [1]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModel
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pprint

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
embeddings = np.load('embeddings.npy')

# Read the two CSV files
df_part1 = pd.read_csv('../data/processed_data_part1.csv')
df_part2 = pd.read_csv('../data/processed_data_part2.csv')

# Read in abstracts
df = pd.concat([df_part1, df_part2], ignore_index=True)
abstracts = df['Abstract'].tolist()
pd.set_option('display.max_colwidth', None)  


In [3]:
# Check for GPU availability and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load BioBERT
tokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-base-cased-v1.2")
model = AutoModel.from_pretrained("dmis-lab/biobert-base-cased-v1.2").to(device)

In [4]:
# Define test queries
test_queries = ["Can intelligence be genetically inherited?", 
                "How does emotional intelligence impact health?",
                "How is artificial intelligence used in the development of new pharmaceutical drugs?"]

def queries_to_embeddings(queries, tokenizer, model):
    embeddings = []
    for query in queries:
        inputs = tokenizer(query, return_tensors="pt", truncation=True, padding=True, max_length=512)
        with torch.no_grad():
            outputs = model(**inputs)
        embedding = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
        embeddings.append(embedding)
    return embeddings

# Convert queries to embeddings
query_embeddings = queries_to_embeddings(test_queries, tokenizer, model)

In [5]:
# Retrieve most similar abstracts for a given query embedding
def find_most_similar(query_emb, abstract_embs, top_n=3):
    similarities = cosine_similarity([query_emb], abstract_embs)[0]
    top_indices = np.argsort(similarities)[::-1][:top_n]
    top_similarities = np.sort(similarities)[::-1][:top_n]
    return top_indices, top_similarities


In [6]:
# Create result df for visualization 
for query, query_emb in zip(test_queries, query_embeddings):
    print(f"Query: {query}")  # Print the query as the title

    # Find the most similar abstracts
    top_indices, top_similarities = find_most_similar(query_emb, embeddings)

    # Add the results to the DataFrame
    rows = []
    for index, similarity in zip(top_indices, top_similarities):
        rows.append({
            'Similarity Score': round(similarity, 5),
            'Abstract Index': index + 1,
            'Abstract': df.iloc[index]['Abstract']
        })
        query_df = pd.DataFrame(rows, columns=['Similarity Score', 'Abstract Index', 'Abstract'])


    display(query_df.style.set_properties(**{'text-align': 'left'})) 

Query: Can intelligence be genetically inherited?


Unnamed: 0,Similarity Score,Abstract Index,Abstract
0,0.86413,7944,This paper presents CloudMedic an eHealth Cloud solution that manages health care services in remote regions of BahiaBrazil For that six main modules Clinic Hospital Supply Administrative Billing and Health Business Intelligence were developed to control the health flow among health actors at health institutions They provided database model and procedures for health business rules a standard gateway for data maintenance between web views and database layer and a multifrontend framework based on web views and web commands configurations These resources were used by 2042 health actors in 261 health posts covering health demands from 118 municipalities at Bahia state They also managed approximately 24 million health service orders and approximately 135 million health exams for more than 13 million registered patients As a result a collection of health functionalities available in a cloud infrastructure was successfully developed deployed and validated in more than 28 of Bahia municipalities A viable eHealth Cloud solution that despite municipality limitations in remote regions decentralized and improved the access to health care services at Bahia state
1,0.84943,11075,In the context of legal damage evaluations evaluees may exaggerate or simulate symptoms in an attempt to obtain greater economic compensation To date practitioners and researchers have focused on detecting malingering behavior as an exclusively unitary construct However we argue that there are two types of inconsistent behavior that speak to possible malingeringaccentuating ie exaggerating symptoms that are actually experienced and simulating ie fabricating symptoms entirelyeach with its own unique attributes thus it is necessary to distinguish between them The aim of the present study was to identify objective indicators to differentiate symptom accentuators from symptom producers and consistent participants We analyzed the Structured Inventory of Malingered Symptomatology scales and the Minnesota Multiphasic Personality Inventory2 Restructured Form validity scales of 132 individuals with a diagnosed adjustment disorder with mixed anxiety and depressed mood who had undergone assessment for psychiatricpsychological damage The results indicated that the SIMS Total Score Neurologic Impairment and Low Intelligence scales and the MMPI2RF Infrequent Responses Fr and Response Bias RBS scales successfully discriminated among symptom accentuators symptom producers and consistent participants Machine learning analysis was used to identify the most efficient parameter for classifying these three groups recognizing the SIMS Total Score as the best indicator
2,0.8442,16094,Precision medicine is one of the recent and powerful developments in medical care which has the potential to improve the traditional symptomdriven practice of medicine allowing earlier interventions using advanced diagnostics and tailoring better and economically personalized treatments Identifying the best pathway to personalized and population medicine involves the ability to analyze comprehensive patient information together with broader aspects to monitor and distinguish between sick and relatively healthy people which will lead to a better understanding of biological indicators that can signal shifts in health While the complexities of disease at the individual level have made it difficult to utilize healthcare information in clinical decisionmaking some of the existing constraints have been greatly minimized by technological advancements To implement effective precision medicine with enhanced ability to positively impact patient outcomes and provide realtime decision support it is important to harness the power of electronic health records by integrating disparate data sources and discovering patientspecific patterns of disease progression Useful analytic tools technologies databases and approaches are required to augment networking and interoperability of clinical laboratory and public health systems as well as addressing ethical and social issues related to the privacy and protection of healthcare data with effective balance Developing multifunctional machine learning platforms for clinical data extraction aggregation management and analysis can support clinicians by efficiently stratifying subjects to understand specific scenarios and optimize decisionmaking Implementation of artificial intelligence in healthcare is a compelling vision that has the potential in leading to the significant improvements for achieving the goals of providing realtime better personalized and population medicine at lower costs In this study we focused on analyzing and discussing various published artificial intelligence and machine learning solutions approaches and perspectives aiming to advance academic solutions in paving the way for a new datacentric era of discovery in healthcare


Query: How does emotional intelligence impact health?


Unnamed: 0,Similarity Score,Abstract Index,Abstract
0,0.899,7944,This paper presents CloudMedic an eHealth Cloud solution that manages health care services in remote regions of BahiaBrazil For that six main modules Clinic Hospital Supply Administrative Billing and Health Business Intelligence were developed to control the health flow among health actors at health institutions They provided database model and procedures for health business rules a standard gateway for data maintenance between web views and database layer and a multifrontend framework based on web views and web commands configurations These resources were used by 2042 health actors in 261 health posts covering health demands from 118 municipalities at Bahia state They also managed approximately 24 million health service orders and approximately 135 million health exams for more than 13 million registered patients As a result a collection of health functionalities available in a cloud infrastructure was successfully developed deployed and validated in more than 28 of Bahia municipalities A viable eHealth Cloud solution that despite municipality limitations in remote regions decentralized and improved the access to health care services at Bahia state
1,0.88564,18335,This study was designed to develop a computeraided diagnosis CAD system based on a convolutional neural network CNN to diagnose patients with pituitary tumors We included adult patients clinically diagnosed with pituitary adenoma pituitary adenoma group or adult individuals without pituitary adenoma control group After preprocessing all the MRI data were randomly divided into training or testing datasets in a ratio of 82 to create or evaluate the CNN model Multiple CNNs with the same structure were applied for different types of MR images respectively and a comprehensive diagnosis was performed based on the classification results of different types of MR images using an equalweighted majority voting strategy Finally we assessed the diagnostic performance of the CAD system by accuracy sensitivity specificity positive predictive value and F1 score We enrolled 149 participants with 796 MR images and adopted the data augmentation technology to create 7960 new images The proposed CAD method showed remarkable diagnostic performance with an overall accuracy of 9102 sensitivity of 9227 specificity of 7570 positive predictive value of 9345 and F1score of 9267 in separate MRI type In the comprehensive diagnosis the CAD achieved better performance with accuracy sensitivity and specificity of 9697 9444 and 100 respectively The CAD system could accurately diagnose patients with pituitary tumors based on MR images Further we will improve this CAD system by augmenting the amount of dataset and evaluate its performance by external dataset
2,0.88479,2558,The impact of a decision support tool designed to embed contextual mission factors was investigated Contextual information may enable operators to infer the appropriateness of data underlying the automations algorithm Research has shown the costs of imperfect automation are more detrimental than perfectly reliable automation when operators are provided with decision support tools Operators may trust and rely on the automation more appropriately if they understand the automations algorithm The need to develop decision support tools that are understandable to the operator provides the rationale for the current experiment A total of 17 participants performed a simulated rapid retasking of intelligence surveillance and reconnaissance ISR assets task with manual decision automation or contextual decision automation differing in two levels of task demand low or high Automation reliability was set at 80 resulting in participants experiencing a mixture of reliable and automation failure trials Dependent variables included ISR coverage and response time of replanning routes Reliable automation significantly improved ISR coverage when compared with manual performance Although performance suffered under imperfect automation contextual decision automation helped to reduce some of the decrements in performance Contextual information helps overcome the costs of imperfect decision automation Designers may mitigate some of the performance decrements experienced with imperfect automation by providing operators with interfaces that display contextual information that is the state of factors that affect the reliability of the automations recommendation


Query: How is artificial intelligence used in the development of new pharmaceutical drugs?


Unnamed: 0,Similarity Score,Abstract Index,Abstract
0,0.9375,10483,Production and characterization of polymeric nanoparticles as colloidal dispersions are processes that require time and technical skills to make the results accurate Computational simulations in nanoscience have been used to help in these processes and provide agility and support to reach results stability and quality in dispersions MultiAgent System for Polymeric Nanoparticles MASPN is an innovative and original simulation environment with features to demonstrate interactions of particles from physicalchemical parameters ensuring Brownian motion of particles and attractive and repulsive behaviour The MASPN environment has been designed and has been built according to the featuredriven development FDD as software methodology and a multiagent systems approach In addition we have used the eventdriven simulation package algs4 the JASON agent building environment all integrated by Java language This paper aims to present the relation of the algs4 package and the JASON tool both integrated into the MASPN environment to generate Brownian motion with elastic and inelastic collisions The MASPN environment as a simulation tool emerges as a result including the following features graphical interface integrated physicalchemical parameters Brownian motion JASON and algs4 integration and distribution charts size zeta potential and pH
1,0.93485,14159,Differentiation of intelligence refers to changes in the structure of intelligence that depend on individuals level of general cognitive ability ability differentiation hypothesis or age developmental differentiation hypothesis The present article aimed to investigate ability differentiation developmental differentiation and their interaction with nonlinear factor analytic models in 2 studies Study 1 was comprised of a nationally representative sample of 7127 US students 494 female iMisubagesub 1451 iSDi 142 range 12081700 who completed the computerized adaptive version of the Armed Service Vocational Aptitude Battery Study 2 analyzed the norming sample of the Berlin Intelligence Structure Test with 1506 German students 44 female iMisubagesub 1454 iSDi 135 range 10001842 Results of Study 1 supported the ability differentiation hypothesis but not the developmental differentiation hypothesis Rather the findings pointed to agededifferentiation ie higher correlations between different abilities with increasing age There was evidence for an interaction between age and ability differentiation with greater ability differentiation found for older adolescents Study 2 provided little evidence for ability differentiation but largely replicated the findings for age dedifferentiation and the interaction between age and ability differentiation The present results provide insight into the complex dynamics underlying the development of intelligence structure during adolescence Implications for the assessment of intelligence are discussed PsycINFO Database Record c 2020 APA all rights reserved
2,0.93312,12488,At the dawn of the new century Robert Plomin was gloomy As he recounts in Blueprint How DNA Makes Us Who We Are attempts to find the DNA responsible for the heritability of behavior failed Month after month journals would report new findings of specific genes for behavioral phenotypes but they never replicated One amazing genomic methodology after another was developed in biological genetics and applied to medicine where it succeeded and then to human behavior where it failed This was the moment of Plomins despair He had with great intellectual courage staked his reputation on the existence of actionable scientific knowledge of the DNAbased genesis of twinbased heritability But Blueprint is hardly the product of a gloomy author Quite the opposite it is a declaration of victory of nature over nurture a celebration of the vindication of Plomin as a scientist and of behavior genetics as a field of study What happened between 2000 and 2019 to brighten Plomins outlook so radically Were the genes for schizophrenia and intelligence finally discovered Are we at last on our way to understanding why at a biological level all differences in human behavior are substantially heritable Alas no What happened is that Robert Plomin gave up on the search for individual genes that explain heritability and decided to be satisfied with much less
