Skip to content

Commit

Permalink
Add Extract4LLM
Browse files Browse the repository at this point in the history
  • Loading branch information
EhsanBitaraf committed Jul 3, 2023
1 parent 9e9cee1 commit 37c4307
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ All notable changes to this project will be documented in this file.

## v0.0.2 - 2023-03-25
### Improvements
- Add Extract4LLM in the_private_backyard2.py (not complete) 2023-07-03
- Add selection-sampling
- Complete `go_affiliation_mining()` & `go_extract_topic()`
- Add Country Based Co Authorship in Jupyter Lab
Expand Down
81 changes: 81 additions & 0 deletions triplea/the_private_backyard2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@

import click
import time
import sys
import json
import re
import networkx as nx
from pymongo import MongoClient
from triplea.config.settings import SETTINGS,ROOT
from triplea.service.click_logger import logger
from triplea.schemas.article import Article
from triplea.schemas.node import Node
from triplea.service.graph.analysis.info import info
import triplea.service.repository.persist as persist
import triplea.service.graph.export.export as gexport
import triplea.service.graph.analysis.ganalysis as ganaliz
import traceback
import os

if __name__ == "__main__":
pass
state = 2
proccess_bar = True
limit_node = 100
path = 'export4llm'
if state is None:
l_pmid = persist.get_all_article_pmid_list()
logger.INFO(str(len(l_pmid)) + " Article(s) ")
else:
l_pmid = persist.get_article_pmid_list_by_state(state)
logger.INFO(str(len(l_pmid)) + " Article(s) is in state " + str(state))

n = 0
if proccess_bar:
bar = click.progressbar(length=len(l_pmid), show_pos=True, show_percent=True)

if os.path.exists(ROOT / path ):
pass
else:
os.mkdir(ROOT / path)

for id in l_pmid:
n = n + 1
if proccess_bar:
bar.update(1)
a = persist.get_article_by_pmid(id)
try:
article = Article(**a.copy())
except Exception:
exc_type, exc_value, exc_tb = sys.exc_info()
print()
logger.ERROR(f"Error {exc_type}")
logger.ERROR(f"Error {exc_value}")
# logger.ERROR(f'Error {exc_tb.tb_next}')
article = None

if limit_node != 0: # Unlimited
if n == limit_node:
pass

# for temp
# return



if article is not None:
if article.Abstract is not None:
#or article != ""
f = open(ROOT / path / f"{article.PMID}.txt", "w", encoding='utf-8')
f.write(article.Abstract)
f.close()

if proccess_bar:
bar.label = f"Article ({n}) (PMID : {article.PMID}): Save Abstract)"
else:
print()
print("Article is None!")


# return

0 comments on commit 37c4307

Please sign in to comment.