In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
cd 'drive'/'My Drive'/'Work'

/content/drive/My Drive/Work


In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.24.0-py3-none-any.whl (5.5 MB)
[K     |████████████████████████████████| 5.5 MB 6.6 MB/s 
[?25hCollecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.10.1-py3-none-any.whl (163 kB)
[K     |████████████████████████████████| 163 kB 66.5 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 64.3 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.10.1 tokenizers-0.13.1 transformers-4.24.0


In [None]:
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.1/en_core_sci_scibert-0.5.1.tar.gz

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.1/en_core_sci_scibert-0.5.1.tar.gz
  Downloading https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.1/en_core_sci_scibert-0.5.1.tar.gz (417.6 MB)
[K     |████████████████████████████████| 417.6 MB 31 kB/s 
Collecting spacy-transformers
  Downloading spacy_transformers-1.1.8-py2.py3-none-any.whl (53 kB)
[K     |████████████████████████████████| 53 kB 1.8 MB/s 
Collecting spacy-alignments<1.0.0,>=0.7.2
  Downloading spacy_alignments-0.8.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 12.5 MB/s 
Collecting transformers<4.22.0,>=3.4.0
  Downloading transformers-4.21.3-py3-none-any.whl (4.7 MB)
[K     |████████████████████████████████| 4.7 MB 57.0 MB/s 
Collecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp3

In [None]:
import torch 
from torch.utils.data import Dataset, DataLoader
from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_cosine_schedule_with_warmup
from tqdm import tqdm
from typing import List
from sklearn.metrics import f1_score, precision_score, recall_score

In [None]:
class RationaleData(Dataset):
    def __init__(self, claim, sentences):
        self.samples = []
        for sentence in sentences:
            self.samples.append({
                'claim': claim,
                'sentence': sentence
            })
            
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, index):
        return self.samples(index)

In [None]:
def encode(claims: List[str], sentences: List[str]):
    encoded_dict = tokenizer.batch_encode_plus(
        list(zip(sentences, claims)),
        pad_to_max_length=True,
        return_tensors='pt')

    if encoded_dict['input_ids'].size(1) > 512:
        # Too long for the model. Truncate it
        encoded_dict = tokenizer.batch_encode_plus(
            list(zip(sentences, claims)),
            max_length=512,
            truncation_strategy='only_first',
            pad_to_max_length=True,
            return_tensors='pt')
            
    encoded_dict = {key: tensor.to(device) for key, tensor in encoded_dict.items()}
    return encoded_dict

In [None]:
def predict(model, dataset):
    model.eval()
    outputs = []
    with torch.no_grad():
        for batch in DataLoader(dataset, batch_size=1):
            encoded_dict = encode(batch['claim'], batch['sentence'])
            logits = model(**encoded_dict)[0]
            outputs.extend(logits.argmax(dim=1).tolist())
    return outputs

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# claim = "ectopic and endogenous PINK1 have been reported to span the outer mitochondrial membrane with the C-terminal kinase region facing the cytosol poised to induce Parkin translocation (Zhou et al., 2008; Narendra et al., 2010)."

In [None]:
tokenizer = AutoTokenizer.from_pretrained("./rationale_biobert_large")
model = AutoModelForSequenceClassification.from_pretrained("./rationale_biobert_large").to(device).eval()

In [None]:
import spacy
# import spacy_transformers
nlp = spacy.load("en_core_sci_scibert")

# with open('PMC2848616.txt') as f:
#     text = f.read()

# doc = nlp(text)
# sentences = [str(s) for s in list(doc.sents)]

In [None]:
# !unzip rationale_biobert_large-20221031T002534Z-001.zip

In [None]:
results = {}

In [None]:
def get_relevant_sentences(pmcid, claims):

    global results

    with open(f'{pmcid}.txt') as f:
        text = f.read()

    doc = nlp(text)
    sentences = [str(s) for s in list(doc.sents)]

    results[pmcid] = []

    for claim in claims:
        curr = []
        evidence = []
        sents = []
        for i, sentence in enumerate(sentences):
            encoded_dict = encode([claim], [sentence])
            logits = model(**encoded_dict)[0]
            pred = logits.argmax(dim=1).tolist()[0]
            if sentences[i] != '\n\n':
                sents.append((i, logits[0][1].cpu().item()))
        evidence.append(sents)

        sents.sort(key = lambda x : -x[1])

        
        for (i, j) in sents[:10]:
            curr.append(i)
        results[pmcid].append(curr)


# PMC2811155

In [None]:
claims = [
    "which is dependent on PINK1 kinase activity (Geisler et al., 2010; Matsuda et al., 2010; Narendra et al., 2010; Vives-Bauza et al., 2010).",
    "Although endogenous PINK1 protein expression is constitutively low owing to rapid turnover, PINK1 proteolysis is inhibited by mitochondrial uncoupling, allowing a robust increase in expression when a mitochondrion is damaged and loses membrane potential (Lin and Kang, 2008; Matsuda et al., 2010; Narendra et al., 2010).",
    "ectopic and endogenous PINK1 have been reported to span the outer mitochondrial membrane with the C-terminal kinase region facing the cytosol poised to induce Parkin translocation (Zhou et al., 2008; Narendra et al., 2010).",
    "owing to insufficient knockdown of endogenous PARL in HeLa cells and to insufficient expression of ectopic PINK1 in PARL knockout (KO) mouse embryonic fibroblasts (MEFs; Narendra et al., 2010).",
    "proteosome-independent proteolysis yields a 52-kD form of PINK1 that is subsequently degraded by the proteosome (Narendra et al., 2010)."
]

In [None]:
get_relevant_sentences('PMC2811155', claims)



In [None]:
results

{'PMC2811155': [[144, 238, 176, 89, 127, 143, 84, 98, 217, 48],
  [87, 379, 35, 215, 85, 380, 221, 3, 89, 83],
  [159, 36, 144, 163, 333, 141, 151, 176, 217, 393],
  [97, 98, 91, 255, 84, 238, 100, 120, 144, 125],
  [379, 86, 74, 35, 221, 238, 95, 78, 85, 48]]}

# PMC2848616

In [None]:
claims = [
    "Autophagy has been shown to protect neurons from Aβ induced cytotoxicity",
    "pharmacological stimulation of autophagy can be beneficial and reduce Aβ mediated toxicity",
    "mTOR signaling was shown to be up-regulated in 7PA2 cells over-expressing mutant APP and in brains of another AD transgenic mouse, with rapamycin treatment reported as protective against behavioral decline",
    "chronic treatment with rapamycin improved behavior of AD transgenic mice although effects were mild",
    "The 3xTg-AD15-18 mice were treated for 3 months based on previous studies showing that in young mice 10-12 weeks of rapamycin administration is sufficient to reduce soluble Aβ and tau [29], [32]."
]

In [None]:
get_relevant_sentences('PMC2848616', claims)

In [None]:
results

{'PMC2811155': [[144, 238, 176, 89, 127, 143, 84, 98, 217, 48],
  [87, 379, 35, 215, 85, 380, 221, 3, 89, 83],
  [159, 36, 144, 163, 333, 141, 151, 176, 217, 393],
  [97, 98, 91, 255, 84, 238, 100, 120, 144, 125],
  [379, 86, 74, 35, 221, 238, 95, 78, 85, 48]],
 'PMC2848616': [[30, 6, 162, 84, 31, 91, 86, 82, 163, 193],
  [86, 6, 91, 85, 30, 42, 84, 31, 8, 37],
  [6, 37, 92, 162, 84, 38, 59, 174, 91, 42],
  [51, 6, 37, 92, 164, 84, 38, 167, 162, 0],
  [6, 38, 179, 91, 37, 7, 48, 62, 60, 42]]}

# PMC2958248

In [None]:
claims = [
    "The cellular exonuclease TREX1 was recently shown to bind and digest excess cytosolic HIV-1 DNA that would otherwise activate type I IFN expression and trigger an innate immune response",
    "Interestingly, similar to SAMHD1, TREX1 mutations in humans are associated with autoimmune and inflammatory diseases, including AGS",
    "TREX1 has previously been implicated in the HIV-1 life cycle",
    "TREX1 suppressed a type I IFN response that otherwise would be induced by HIV-1 infection",
    "TREX1 was proposed to degrade cytosolic DNA resulting from HIV-1 reverse transcription, which would otherwise be capable of inducing an IFN response"
]

In [None]:
get_relevant_sentences('PMC2958248', claims)

# PMC3090745

In [None]:
claims = [
    "These studies have identified several new genes that show significant association after multiple test correction in multiple datasets: CLU, PICALM, BIN1, CR1, ABCA7, MS4A6A, CD33 and CD2AP [31]–[35].",
    "All variants classified as “likely pathogenic” or with an “unknown pathogenicity”, and some “non-pathogenic” variants were genotyped in 961 sporadic AD cases, 1,346 unrelated elderly non-demented controls",
    "Since 2009, four GWAS and a three-stage analysis of the GWAS resulted in the identification of nine novel loci associated with late-onset AD: CLU, PICALM, CR1, BIN1, ABCA7, MS4A cluster (MS4A6A/MS4A4E), CD2AP, CD33, and EPHA1 (Harold et al., 2009; Lambert et al., 2009; Seshadri et al., 2010; Hollingworth et al., 2011a,b; Naj et al., 2011).",
    "Examining the amount of genetic risk effect attributable to these genes (other than APOE), the most strongly associated single-nucleotide polymorphisms at each locus have population attributable fractions between 2.72 and 5.97%, with a cumulative population attributable fraction for non-APOE loci estimated to be as much as 35% (Naj et al., 2011).",
    "To investigate whether any of the significant brain cisSNPs may influence risk of AD, we compared our eGWAS results to the AD risk associations from the large AD GWAS conducted by ADGC [28]."
]

In [None]:
get_relevant_sentences('PMC3090745', claims)

In [None]:
results

{'PMC2811155': [[144, 238, 176, 89, 127, 143, 84, 98, 217, 48],
  [87, 379, 35, 215, 85, 380, 221, 3, 89, 83],
  [159, 36, 144, 163, 333, 141, 151, 176, 217, 393],
  [97, 98, 91, 255, 84, 238, 100, 120, 144, 125],
  [379, 86, 74, 35, 221, 238, 95, 78, 85, 48]],
 'PMC2848616': [[30, 6, 162, 84, 31, 91, 86, 82, 163, 193],
  [86, 6, 91, 85, 30, 42, 84, 31, 8, 37],
  [6, 37, 92, 162, 84, 38, 59, 174, 91, 42],
  [51, 6, 37, 92, 164, 84, 38, 167, 162, 0],
  [6, 38, 179, 91, 37, 7, 48, 62, 60, 42]],
 'PMC2958248': [[131, 5, 152, 193, 0, 4, 333, 132, 31, 119],
  [205, 16, 31, 97, 33, 160, 4, 126, 152, 131],
  [80, 91, 97, 0, 126, 131, 160, 157, 330, 40],
  [132, 0, 105, 345, 152, 131, 126, 24, 157, 4],
  [131, 31, 5, 127, 193, 33, 81, 181, 0, 157]],
 'PMC3090745': [[61, 3, 4, 27, 71, 30, 49, 62, 86, 65],
  [4, 84, 3, 9, 59, 68, 27, 43, 81, 86],
  [32, 30, 42, 3, 27, 7, 34, 79, 31, 4],
  [81, 4, 66, 52, 93, 71, 62, 3, 54, 86],
  [4, 43, 81, 3, 59, 36, 86, 35, 58, 73]]}

# PMC3096954

In [None]:
claims = [
    "Recently, a subgroup of Basal-like tumors associated with poor prognosis has also been reported [4,5].",
    "Context 130, a Basal-like context has under-expression of GATA3 which is in concordance with previous studies of Basal-like subgroup, ’claudin-low’ with poor prognosis and more refractory to chemotherapy [5] .",
    "Molecular profiling has identified 5 distinct subtypes of human breast tumors, luminal A, luminal B, HER2-enriched, basal-like and claudin-low [1-7].",
    "Claudin-low tumors express mesenchymal genes such as Twist1, Twist2, Zeb1, Zeb2, Slug and Snail and low levels of E-cadherin and claudins 3, 4 and 7 [6,7].",
    "Since claudin-low tumors possess features of stem cells [7], it is possible that the tumors that regress and recur or only partially regress are those that contain claudin-low cells.",
]

In [None]:
get_relevant_sentences('PMC3096954', claims)

# PMC3179858

In [None]:
claims = [
    "By contrast, Hrecka and colleagues identified SAMHD1 from HEK 293T cells expressing tagged Vpx in a proteomic screen using multidimensional protein identification technology [10].",
    "They demonstrated that Vpx relieves the inhibition of HIV-1 infection in monocyte-derived macrophages by mediating proteasome-dependent degradation of SAMHD1 through the CUL4A/DCAF1 E3 ubiquitin ligase [10].",
    "Both studies confirmed that Vpx interacts with SAMHD1 and induces proteasomal degradation of SAMHD1 in THP-1 cells or macrophages, which can be restored by treatment with a proteasome inhibitor [9,10].",
    "Further analysis revealed that SAMHD1 blocks HIV-1 reverse transcription, as silencing SAMHD1 in THP-1 cells [9] and macrophages [10] increases the levels of viral DNA.",
    "Together, these studies suggested that SAMHD1 is the myeloid-cell specific HIV-1 restriction factor counteracted by Vpx [9,10] (Figure 1)."
]

In [None]:
get_relevant_sentences('PMC3179858', claims)

# PMC3442244

In [None]:
claims = [
    "Currently, over 60 common risk variants have been identified [30–34], with a combined disease risk of 5–10% [34, 35], suggesting the existence of many more as yet undiscovered loci [34, 36, 37].",
    "A fundamental challenge facing those wishing to determine which of the genes in a particular locus is responsible for affecting disease risk, and dissect how this/these act, is the very scale of the problem (currently more than 500 genes in total to interrogate, with others emerging) [35]",
    "According to the most recent meta-analysis of genome-wide association studies, 63 individual SNPs have now been linked with diabetes risk [1].",
    "However, these variants explain only ∼5.7% of variance in disease susceptibility [1].",
    "Of these SNPs, 66,000 were selected for efficient replication of top signals across multiple traits, with the hope that when tested in sufficiently large samples they might exceed genome-wide statistical significance (9,10)"
]

In [None]:
get_relevant_sentences('PMC3442244', claims)

# PMC3522098

In [None]:
claims = [
    "Interestingly, we found that Fndc5, the gene that codes for the recently discovered antidiabetic hormone irisin [39] was also significantly increased in Cox6a2 −/− diaphragm (data not shown).",
    "This suggests the existence of a crosstalk between skeletal muscle and adipose tissue, as was recently shown with the identification of a new hormone, irisin, which stimulates browning and UCP1 expression in WAT [39].",
    "Irisin, encoded by the Fndc5 gene, is a newly identified hormone from the muscle of PGC-1α transgenic mice.67 Irisin can be induced by exercise in both mice and humans",
    "Overexpression of irisin in the liver of mice, via adenoviral delivery, increases energy expenditure and reduces high fat diet-induced obesity.67",
    "This question is especially pertinent because interest in thermogenic drugs and drug targets has been rekindled by new evidence that brown adipose tissue can be active in adult humans and the discovery of new targets for drugs that might augment and activate brown adipose tissue (Fruhbeck et al., 2009; Wu et al., 2011; Bostrom et al., 2012; Fournier et al., 2012; Ye et al., 2012)."
]

In [None]:
get_relevant_sentences('PMC3522098', claims)

# PMC3607626

In [None]:
claims = [
    "To date, a number of studies have investigated gene expression in ASD (for review see [29]), with three examining ASD brain tissue on a genome-wide scale [30], [31], [32]."
    "Lastly, we were interested in considering our findings in the context of the major three published transcriptomics studies on ASD brain tissue [30], [31], [32].",
    "The most comprehensive transcriptomics study of ASD post-mortem brain to date (Voineagu et al) concludes that one of two significant co-expression networks is involved in immune function [32].",
    "This network analysis led to the identification of specific co-expression modules that are differentially expressed in ASD and controls [20].",
    "These included a neuronal module that was enriched for genes with low GWAS P-values, suggesting that the differential expression of this module between cases and controls reflects a causal relationship [20]."
]

In [None]:
get_relevant_sentences('PMC3607626', claims)

# PMC3650111

In [None]:
claims = [
    "Interestingly, Prevotella-dominated healthy omnivore individuals were recently reported to have increased basal levels of serum TMAO (trimethylamine N-oxide), a product of inflammation linked to atherogenesis, compared to Bacteroides-dominated healthy individuals (Koeth et al., 2013).",
    "While TMAO could be derived from increased consumption of meat (Koeth et al., 2013), Prevotella has been previously associated with a dearth of meat in the diet (Wu et al., 2011).",
    "Additional, recent mouse studies [145] investigating how consumption of red meat may accelerate cardiovascular disease and inflammation in humans [146,147] suggest an additional and potentially serious limitation on probiotic supplementation.",
    "Dietary L-carnitine and choline, compounds abundant in red meat, are metabolized into trimethylamine-N-oxide (TMAO) by way of some normal gut commensals; in mice TMAO enhances atherosclerosis through disrupting cholesterol metabolism and foam-cell macrophage activity [145].",
    "In both humans and mice, TMAO concentrations correlate positively with CVD risk, and dietary supplementation with either TMAO or compounds that can be metabolized to TMAO increases atherosclerosis in mice12"
]

In [None]:
get_relevant_sentences('PMC3650111', claims)

In [None]:
# import json

# with open('results_rerrfact.json', 'w') as f:
#     json.dump(results, f)

In [None]:
import json

with open('results_rerrfact_1.json', 'w') as f:
    json.dump(results, f)

In [None]:
results

{'PMC2811155': [[144, 238, 176, 89, 127],
  [87, 379, 35, 215, 85, 380, 221, 3, 89, 83],
  [159, 36, 144, 163, 333, 141, 151, 176, 217, 393],
  [97, 98, 91, 255, 84, 238, 100],
  [379, 86, 74, 35]],
 'PMC2848616': [[30, 6, 162, 84, 31, 91, 86],
  [86, 6, 91, 85, 30, 42, 84, 31, 8, 37],
  [6, 37, 92, 162, 84, 38],
  [51, 6, 37, 92, 164, 84, 38, 167, 162],
  [6, 38, 179, 91]],
 'PMC2958248': [[131, 5, 152, 193, 0, 4, 333, 132, 31, 119],
  [205, 16],
  [80, 91, 97, 0, 126, 131, 160, 157, 330, 40],
  [132, 0, 105, 345, 152, 131, 126, 24, 157, 4],
  [131, 31, 5, 127, 193, 33, 81, 181, 0, 157]],
 'PMC3090745': [[61, 3, 4, 27], [], [32, 30, 42, 3, 27, 7], [81, 4, 66], []],
 'PMC3096954': [[216, 238, 186, 161, 14],
  [216, 238, 106],
  [106, 22, 114, 156, 14, 338, 172, 209, 6, 147],
  [212, 131, 209, 172],
  [172, 209, 224, 213, 194, 236, 222, 238, 6, 248]],
 'PMC3179858': [[],
  [6, 16, 46, 53, 69, 0, 168, 52, 81, 173],
  [6, 43, 52, 53, 69, 168, 16, 44, 51, 46],
  [68, 64, 180, 5, 62],
  [78

In [None]:
with open('results.json') as f:
    old = json.load(f)

In [None]:
old

{'PMC2811155': [[33, 127, 112, 257, 101, 269, 105, 95, 147, 106],
  [215, 73, 3, 42, 380, 89, 217, 333, 61, 35],
  [157, 168, 141, 163, 161, 333, 217, 176, 94, 299],
  [98, 103, 399, 97, 238, 353, 99, 96, 91, 170],
  [86, 74, 379, 81, 220, 315, 3, 95, 152, 35]],
 'PMC2848616': [[30, 8, 40, 31, 86, 82, 89, 42, 91, 85],
  [42, 91, 86, 8, 82, 89, 92, 40, 38, 85],
  [37, 6, 84, 58, 9, 42, 38, 92, 96, 59],
  [164, 6, 56, 51, 58, 8, 37, 92, 167, 38],
  [38, 85, 179, 60, 86, 92, 61, 42, 91, 8]],
 'PMC2958248': [[5, 131, 0, 3, 132, 81, 152, 70, 21, 17],
  [16, 205, 207, 208, 188, 0, 211, 337, 41, 105],
  [0, 12, 59, 188, 82, 97, 131, 105, 86, 80],
  [0, 3, 131, 105, 21, 4, 132, 97, 70, 51],
  [5, 131, 70, 0, 51, 3, 81, 21, 100, 160]],
 'PMC3090745': [[79, 3, 80, 52, 61, 32, 27, 90, 42, 58],
  [9, 84, 5, 11, 55, 1, 6, 15, 10, 23],
  [1, 26, 80, 34, 7, 30, 42, 79, 9, 32],
  [81, 73, 52, 72, 65, 82, 26, 29, 80, 85],
  [9, 1, 92, 63, 38, 55, 52, 58, 3, 22]],
 'PMC3096954': [[158, 216, 186, 123, 25

In [None]:
import numpy as np
all = []
for pmcid in results.keys():
    res = 0
    for arr1, arr2 in zip(results[pmcid], old[pmcid]):
        # print(sorted(arr1))
        # print(sorted(arr2))
        # print()
        if len(arr1) == 0 or len(arr2) == 0:
            continue
        else:
            res += len(np.intersect1d(arr1, arr2))/min(len(arr1), len(arr2))
    all.append(res/5)
sum(all)/len(all)


0.536579365079365

In [None]:
import numpy as np
all = []
for pmcid in results.keys():
    res = 0
    for arr1, arr2 in zip(results[pmcid], old[pmcid]):
        # print(sorted(arr1))
        # print(sorted(arr2))
        # print()
  
        if len(arr1) == 0 or len(arr2) == 0:
            continue
        else:
            res += len(np.intersect1d(arr1, arr2))/10
    all.append(res/5)
sum(all)/len(all)

0.418

In [None]:
import numpy as np
all = []
for pmcid in results.keys():
    res = 0
    for arr1, arr2 in zip(results[pmcid], old[pmcid]):
        # print(sorted(arr1))
        # print(sorted(arr2))
        # print()
        arr2 = arr2[:3]

        if len(arr1) == 0 or len(arr2) == 0:
            continue
        else:
            res += len(np.intersect1d(arr1, arr2))/3
    all.append(res/5)
sum(all)/len(all)

0.5733333333333335

In [None]:
all

[0.4,
 0.58,
 0.4,
 0.3,
 0.56,
 0.48,
 0.45999999999999996,
 0.45999999999999996,
 0.24,
 0.3]

# Claim 1

In [None]:
claim = "Autophagy has been shown to protect neurons from Aβ induced cytotoxicity"

In [None]:
evidence = []
sents = []
for i, sentence in enumerate(sentences):
    encoded_dict = encode([claim], [sentence])
    logits = model(**encoded_dict)[0]
    pred = logits.argmax(dim=1).tolist()[0]
    if pred == 1 and sentences[i] != '\n\n':
        sents.append((i, logits[0][1].cpu().item()))
evidence.append(sents)

sents.sort(key = lambda x : -x[1])

for (i, j) in sents[:10]:
    print(sentences[i])

While excessive autophagic activity can lead to cell death, increased autophagy has been shown to facilitate the clearance of aggregation-prone proteins such as Aβ[20], [21], [22], pathological prion protein[23], [24], and α-synuclein[25], and to promote neuronal survival in a variety of neurodegenerative disease models.
Here we show that long-term inhibition of mTOR by rapamycin prevented AD-like cognitive deficits and lowered levels of Aβ , a major toxic species in AD[7], in the PDAPP transgenic mouse model.
 
The data presented here are, to our knowledge, the first to show that inhibition of mTOR by rapamycin decreased Aβ  levels ( Fig. 2 ) and rescued cognitive function ( Fig. 1 ) in a mouse model of AD.
Supporting the notion that autophagy may have a protective role in AD, deletion of the beclin 1 gene in PDAPP mice impaired autophagy and resulted in large increases in Aβ levels and accelerated Aβ deposition[26].
Prolonged rapamycin treatment may thus release mTOR-mediated inhibit

# Claim 2

In [None]:
claim = "pharmacological stimulation of autophagy can be beneficial and reduce Aβ mediated toxicity"

In [None]:
evidence = []
sents = []
for i, sentence in enumerate(sentences):
    encoded_dict = encode([claim], [sentence])
    logits = model(**encoded_dict)[0]
    pred = logits.argmax(dim=1).tolist()[0]
    if pred == 1 and sentences[i] != '\n\n':
        sents.append((i, logits[0][1].cpu().item()))
evidence.append(sents)

sents.sort(key = lambda x : -x[1])

for (i, j) in sents[:10]:
    print(sentences[i])

In addition, rapamycin treatment did not induce autophagy and did not affect levels of endogenous Aβ in non-transgenic mice, suggesting that autophagy may have a key role in reducing Aβ  in transgenic PDAPP brains.
Here we show that long-term inhibition of mTOR by rapamycin prevented AD-like cognitive deficits and lowered levels of Aβ , a major toxic species in AD[7], in the PDAPP transgenic mouse model.
Prolonged rapamycin treatment may thus release mTOR-mediated inhibition of autophagy and allow for the reduction of Aβ levels through this clearance mechanism in transgenic PDAPP brains.
Our data suggest that the reduction in Aβ  levels and the improvement in cognitive function in rapamycin-treated PDAPP mice may be a consequence of the induction of autophagy in hippocampus ( Fig. 3 ) by high levels of Aβ in PDAPP transgenic brains. Consistent with a key role for high levels of Aβ in the activation of autophagy when mTOR activity is reduced, rapamycin did not induce autophagy in brains

# Claim 3

In [None]:
claim = "mTOR signaling was shown to be up-regulated in 7PA2 cells over-expressing mutant APP and in brains of another AD transgenic mouse, with rapamycin treatment reported as protective against behavioral decline"

In [None]:
evidence = []
sents = []
for i, sentence in enumerate(sentences):
    encoded_dict = encode([claim], [sentence])
    logits = model(**encoded_dict)[0]
    pred = logits.argmax(dim=1).tolist()[0]
    if pred == 1 and sentences[i] != '\n\n':
        sents.append((i, logits[0][1].cpu().item()))
evidence.append(sents)

sents.sort(key = lambda x : -x[1])

for (i, j) in sents[:10]:
    print(sentences[i])

Here we show that long-term inhibition of mTOR by rapamycin prevented AD-like cognitive deficits and lowered levels of Aβ , a major toxic species in AD[7], in the PDAPP transgenic mouse model.
Here we show that long-term mTOR inhibition by rapamycin inhibited mTOR in brain, prevented AD-like cognitive deficits ( Fig. 1 ) and lowered levels of Aβ  ( Fig. 2 ) in the PDAPP transgenic mouse model.
Although rapamycin treatment did not activate autophagy nor reduce endogenous mouse Aβ levels, it inhibited mTOR function in non-transgenic littermate brains, and this group showed trends to improved learning and retention.
 
The data presented here are, to our knowledge, the first to show that inhibition of mTOR by rapamycin decreased Aβ  levels ( Fig. 2 ) and rescued cognitive function ( Fig. 1 ) in a mouse model of AD.
These data indicate that inhibition of the mTOR pathway by long-term rapamycin treatment can reduce Aβ  levels in vivo and block or delay AD in mice.


# Claim 4

In [None]:
claim = "chronic treatment with rapamycin improved behavior of AD transgenic mice although effects were mild"

In [None]:
evidence = []
sents = []
for i, sentence in enumerate(sentences):
    encoded_dict = encode([claim], [sentence])
    logits = model(**encoded_dict)[0]
    pred = logits.argmax(dim=1).tolist()[0]
    if pred == 1 and sentences[i] != '\n\n':
        sents.append((i, logits[0][1].cpu().item()))
evidence.append(sents)

sents.sort(key = lambda x : -x[1])

for (i, j) in sents[:10]:
    print(sentences[i])

Rapamycin-fed transgenic PDAPP mice, however, showed improved learning ( Figure 1a ) and memory ( Figure 1b ), with improved performances on the last day of training and retention of the former location of the escape platform restored to levels indistinguishable from those of non-transgenic littermates ( Figure 1b ).
Here we show that long-term inhibition of mTOR by rapamycin prevented AD-like cognitive deficits and lowered levels of Aβ , a major toxic species in AD[7], in the PDAPP transgenic mouse model.
Here we show that long-term mTOR inhibition by rapamycin inhibited mTOR in brain, prevented AD-like cognitive deficits ( Fig. 1 ) and lowered levels of Aβ  ( Fig. 2 ) in the PDAPP transgenic mouse model.
Although rapamycin treatment did not activate autophagy nor reduce endogenous mouse Aβ levels, it inhibited mTOR function in non-transgenic littermate brains, and this group showed trends to improved learning and retention.
While learning in both transgenic groups was impaired with r

# Claim 5

In [None]:
claim = "The 3xTg-AD15-18 mice were treated for 3 months based on previous studies showing that in young mice 10-12 weeks of rapamycin administration is sufficient to reduce soluble Aβ and tau [29], [32]."

In [None]:
evidence = []
sents = []
for i, sentence in enumerate(sentences):
    encoded_dict = encode([claim], [sentence])
    logits = model(**encoded_dict)[0]
    pred = logits.argmax(dim=1).tolist()[0]
    if pred == 1 and sentences[i] != '\n\n':
        sents.append((i, logits[0][1].cpu().item()))
evidence.append(sents)

sents.sort(key = lambda x : -x[1])

for (i, j) in sents[:10]:
    print(sentences[i])

Here we show that long-term inhibition of mTOR by rapamycin prevented AD-like cognitive deficits and lowered levels of Aβ , a major toxic species in AD[7], in the PDAPP transgenic mouse model.
These data indicate that inhibition of the mTOR pathway by long-term rapamycin treatment can reduce Aβ  levels in vivo and block or delay AD in mice.
 , rapamycin did not alter Aβ  levels but significantly decreased soluble Aβ  levels in the brains of transgenic PDAPP mice *, P = 0.02.
Prolonged rapamycin treatment may thus release mTOR-mediated inhibition of autophagy and allow for the reduction of Aβ levels through this clearance mechanism in transgenic PDAPP brains.


In [None]:
with open("PMC2995166_3.txt") as f:
    content = f.read()

In [None]:
!pip install scispacy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting scispacy
  Downloading scispacy-0.5.1-py3-none-any.whl (44 kB)
[K     |████████████████████████████████| 44 kB 3.2 MB/s 
Collecting nmslib>=1.7.3.6
  Downloading nmslib-2.1.1-cp37-cp37m-manylinux2010_x86_64.whl (13.5 MB)
[K     |████████████████████████████████| 13.5 MB 43.6 MB/s 
[?25hCollecting pysbd
  Downloading pysbd-0.3.4-py3-none-any.whl (71 kB)
[K     |████████████████████████████████| 71 kB 10.9 MB/s 
[?25hCollecting conllu
  Downloading conllu-4.5.2-py2.py3-none-any.whl (16 kB)
Collecting pybind11<2.6.2
  Downloading pybind11-2.6.1-py2.py3-none-any.whl (188 kB)
[K     |████████████████████████████████| 188 kB 75.3 MB/s 
Installing collected packages: pybind11, pysbd, nmslib, conllu, scispacy
Successfully installed conllu-4.5.2 nmslib-2.1.1 pybind11-2.6.1 pysbd-0.3.4 scispacy-0.5.1


In [None]:
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.1/en_core_sci_scibert-0.5.1.tar.gz

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.1/en_core_sci_scibert-0.5.1.tar.gz
  Downloading https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.1/en_core_sci_scibert-0.5.1.tar.gz (417.6 MB)
[K     |████████████████████████████████| 417.6 MB 29 kB/s 
Collecting spacy-transformers
  Downloading spacy_transformers-1.1.8-py2.py3-none-any.whl (53 kB)
[K     |████████████████████████████████| 53 kB 1.8 MB/s 
Collecting spacy-alignments<1.0.0,>=0.7.2
  Downloading spacy_alignments-0.8.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 24.9 MB/s 
[?25hCollecting transformers<4.22.0,>=3.4.0
  Downloading transformers-4.21.3-py3-none-any.whl (4.7 MB)
[K     |████████████████████████████████| 4.7 MB 60.3 MB/s 
Collecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12

In [None]:
import spacy
nlp = spacy.load("en_core_sci_scibert")

In [None]:
doc = nlp(content)
sentences = [str(s) for s in list(doc.sents)]

In [None]:
!unzip rationale_biobert_large-20221031T002534Z-001.zip

Archive:  rationale_biobert_large-20221031T002534Z-001.zip
  inflating: rationale_biobert_large/pytorch_model.bin  
  inflating: rationale_biobert_large/tokenizer_config.json  
  inflating: rationale_biobert_large/config.json  
  inflating: rationale_biobert_large/vocab.txt  
  inflating: rationale_biobert_large/special_tokens_map.json  


In [None]:
tokenizer = AutoTokenizer.from_pretrained("./rationale_biobert_large")
model = AutoModelForSequenceClassification.from_pretrained("./rationale_biobert_large").to(device).eval()

# abstract_retrieval = jsonlines.open(args.abstract)
# dataset = jsonlines.open(args.dataset)
# corpus = {doc['doc_id']: doc for doc in jsonlines.open(args.corpus)}
# output = jsonlines.open('{}/rationale_selection.jsonl'.format(args.output), 'w')

# with torch.no_grad():
#     for data, retrieval in tqdm(list(zip(dataset, abstract_retrieval))):
#         assert data['id'] == retrieval['id']

# Claim
# claim = ""
evidence = []
sents = []
for i, sentence in enumerate(sentences):
    encoded_dict = encode([claim], [sentence])
    logits = model(**encoded_dict)[0]
    pred = logits.argmax(dim=1).tolist()[0]
    if pred == 1:
        sents.append((i, logits[0][1].cpu().item()))
evidence.append(sents)

In [None]:
sents.sort(key = lambda x : -x[1])

In [None]:
for (i, j) in sents[:5]:
    print(sentences[i])

Thus, FRB-PINK1Δ1-110-YFP should be recruited from the cytosol to mitochondria if the FKBP domain of TOM20-FKBP faces the cytosol but not if it faces the inter membrane space or the matrix.
Selective accumulation of PINK1 on the impaired mitochondria recruits Parkin, and Parkin, in turn, induces the degradation of the damaged mitochondria.
Consistent with necessity of PINK1's mitochondrial localization and kinase activity, exogenous expression of PINK1 KD or PINK1 ΔN fails to accelerate the kinetics of Parkin recruitment (Figure 5B).
Thus, increased expression of PINK1 on the outer mitochondrial membrane is sufficient to recruit Parkin to mitochondria.

Increased expression of PINK1 on the outer mitochondrial membrane is sufficient for Parkin recruitment.


In [None]:
sents

In [None]:
len(sentences)

521