# Indexing Data into the Open Research Knowledge Graph

This Notebook shows how to index new Ressources into the ORKG from the TIB Hannover. The Papers we will index in this Notebook are retrieved from pubmed 

In [None]:
import pandas as pd
from Bio import Entrez, Medline
from orkg import ORKG
from orkg.utils import NamespacedClient, query_params, dict_to_url_params
from orkg.out import OrkgResponse
from orkg.client.resources import ResourcesClient
from orkg import client
from orkg.client.classes import ClassesClient

# Retrieving Papers

In [None]:
Entrez.api_key = "" # if you don´t have an API-Key just use ur email-adress beneath
Entrez.email = ""

In [None]:
query = input("Enter a query: ") # enter the query you want to retrieve papers to

In [None]:
pubmed_idlist = []
handle = Entrez.esearch(db="pubmed", term = query, retmax = "10") # here we retrieve the pubmedID of top 10 results for that query
record = Entrez.read(handle)
pubmed_idlist.append(record["IdList"])
flat_pubmed_idlist = [item for sublist in pubmed_idlist for item in sublist]

In [None]:
flat_pubmed_idlist

In [None]:
handle = Entrez.efetch(db = "pubmed", id = flat_pubmed_idlist, rettype = "medline", retmode = "json") # retrieving the content of the paper
records = Medline.parse(handle)

data_list =[]

for record in records:
    data_list.append(record)

In [None]:
df = pd.DataFrame(data_list)

In [None]:
df = df[["TI", "AID", "AUID", "JT"]]

In [None]:
df = df.rename(columns = {"TI" : "paper:title", "AID" : "paper:DOI", "AUID" : "paper:authors", "JT" : "HAS_VENUE"})

# Adding research_field and research_problem manually

In [None]:
df["paper:research_field"] = ""
df["contribution:research_problem"] = ""

In [None]:
df["paper:research_field"][0] = ["R84"] 
df["paper:research_field"][1] = ["R84"]
df["paper:research_field"][2] = ["R84"]
df["paper:research_field"][3] = ["R84"]
df["paper:research_field"][4] = ["R52"]
df["paper:research_field"][5] = ["R52"]
df["paper:research_field"][6] = ["R56"]
df["paper:research_field"][7] = ["R84"]
df["paper:research_field"][8] = ["R84"]
df["paper:research_field"][9] = ["R52"]

In [None]:
df["contribution:research_problem"][0] = ["Effect of low dietry fiber on Crohn´s disease"]
df["contribution:research_problem"][1] = ["Short fatty acids as a therapeutical target in management of liver diseases"]
df["contribution:research_problem"][2] = ["Effects of obesity-induced insulin resistance"]
df["contribution:research_problem"][3] = ["Gut microbiome difference of breast-fed versus formula-fed infants"]
df["contribution:research_problem"][4] = ["Effect of limonite on diabetes mellitus"]
df["contribution:research_problem"][5] = ["Connection between gut microbiota and brain disease"]
df["contribution:research_problem"][6] = ["Connection between microbiota and, the immune system and metabolic disorders"]
df["contribution:research_problem"][7] = ["Effect of Ligilactobacillus salivarius on obesity"]
df["contribution:research_problem"][8] = ["Effect of Butyrat on obestiy related anxiety-like disorders"]
df["contribution:research_problem"][9] = ["Effect of gut microbiota on Idiopathic intracranial hypertension syndrome"]

# Adding triples manually
The ORKG works with triples based on the Human Disease Ontology. So we have to add them manually since there are no pythonmoduls or NLP software to do so.

In [None]:
df["keywords"] = ""

In [None]:
df["keywords"][0] = ["dietry fiber", "promotes", "Crohn's disease"]
df["keywords"][1] = ["obesity", "promotes", "liver diseases"]
df["keywords"][2] = ["high fat-diet", "causes", "obesity"]
df["keywords"][3] = ["gut microbiota", "influences", "diabetes"]
df["keywords"][4] = ["limonite intake", "decreases", "obesity"]
df["keywords"][5] = ["gut", "influences", "brain"]
df["keywords"][6] = ["NOD-1", "promotes", "obesity"]
df["keywords"][7] = ["LCK11", "prevents", "obesity"]
df["keywords"][8] = ["obesity", "influences", "anxiety disorder"]
df["keywords"][9] = ["obesity", "causes", "pseudotumor cerebri"]

In [72]:
df.to_csv("papers_about_obesity.csv")

# Adding the file as a ressource to the ORKG

In [None]:
orkg = ORKG(host="https://www.orkg.org/orkg", creds=('orkg-email-address', 'orkg-password'))

In [None]:
orkg.papers.add_csv(file = "papers_about_obesity.csv")