# Identifying key paragraphs in papers downloaded from the RSC
---

In [1]:
import pandas as pd
from chemdataextractor.doc import Document
from chemdataextractor.doc.text import Paragraph, Title
from chemdataextractor.doc.table import Table
from chemdataextractor.model.model import Compound, ModelType, StringType
import os, errno
from glob import glob
import numpy as np
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
from itertools import tee
from tqdm.notebook import tqdm, trange
#from synparagraph import count_all_quantities
from pprint import pprint

import sys
try:
    from synoracle.xptlpaper import ExperimentalPaper
except ModuleNotFoundError:
    module_path = os.path.abspath(os.path.join('..'))
    if module_path not in sys.path:
        sys.path.append(module_path)
    from synoracle.xptlpaper import ExperimentalPaper

def li_iterate(li):
    l = iter(li)
    for _ in trange(len(li)):
        yield next(l)


# The processing loop

In [2]:
test_paper = ExperimentalPaper(paper_identifier='S1385894723007039', source_directory='./')
test_paper.create_cde_doc()
test_paper.cde_doc.titles

[Title(id=None, references=[], text='Facile synthesis of ZIF-8 incorporated electrospun PAN/PEI nanofibrous composite membrane for efficient Cr(VI) adsorption from water ')]

In [3]:
for c,paragraph in enumerate(test_paper.cde_doc.paragraphs):
    if test_paper.count_all_quantities(paragraph)[1] > 3:
        print(f'\n\n{c}\n--------') 
        print(paragraph.text)
        print(test_paper.count_all_quantities(paragraph))



90
--------
0.730 g of Zn(NO3)2·6H2O was dissolved in 40 mL methanol and sonicated for 10 min to form solution A. Similarly, 3.285 g of 2-methylimidazole was dissolved in 40 mL methanol and sonicated for 10 min to form solution B. The two solutions were then mixed and stirred vigorously for 3 h at 25 ± 2 °C. Subsequently, the turbid mixture was separated by centrifugation (10000 rpm) yielding white precipitates that were washed with methanol three times and once with ethanol before drying at 60 °C under vacuum.
([re.compile('0\\.730.g'), re.compile('40.mL'), re.compile('3\\.285.g'), re.compile('40.mL')], 4)


102
--------
The effects of ZIF-8 loading (0–10%) in PAN/PEI NCM and of the initial pH of aqueous solution over a pH range of 2.0–7.0 on the removal efficiency of Cr(VI) were investigated. Briefly, 10 ± 0.1 mg of the adsorbent was introduced to a 50 mL solution with initial Cr(VI) concentrations of 50 and 100 mg·L−1 for pH and ZIF-8 loading effect studies, respectively. The init

In [4]:
test_paper.identify_key_paragraphs()
test_paper.candidate_paragraphs

Initialising AllenNLP model ✔   


{90: Paragraph(id='p0060', references=[], text='0.730\xa0g of Zn(NO3)2·6H2O was dissolved in 40\xa0mL methanol and sonicated for 10\xa0min to form solution A. Similarly, 3.285\xa0g of 2-methylimidazole was dissolved in 40\xa0mL methanol and sonicated for 10\xa0min to form solution B. The two solutions were then mixed and stirred vigorously for 3\xa0h at 25\xa0±\xa02\xa0°C. Subsequently, the turbid mixture was separated by centrifugation (10000\xa0rpm) yielding white precipitates that were washed with methanol three times and once with ethanol before drying at 60\xa0°C under vacuum.'),
 102: Paragraph(id='p0080', references=[], text='The effects of ZIF-8 loading (0–10%) in PAN/PEI NCM and of the initial pH of aqueous solution over a pH range of 2.0–7.0 on the removal efficiency of Cr(VI) were investigated. Briefly, 10\xa0±\xa00.1\xa0mg of the adsorbent was introduced to a 50\xa0mL solution with initial Cr(VI) concentrations of 50 and 100\xa0mg·L−1 for pH and ZIF-8 loading effect studies

In [6]:
test_paper.output_paragraphs(paragraph_keys=[90])