In [1]:
import pandas as pd
import numpy as np
import os, gc
import random

#Import Faker
from faker import Faker

#Create faker object
fake = Faker()

os.chdir('data')

In [2]:
def convert_to_dt(df):
    'Converts date columns (inferred from column names) into pandas datetime types'
    for col in [i for i in df.columns if 'date' in i.lower() or i.endswith('dt')]:
        print(f'To date: {col}')
        df[col]=pd.to_datetime(df[col])

# Prep dataframes

In [3]:
# Read authors and merge institution
author=pd.read_csv('authors.csv').merge(pd.read_csv('institutions.csv').rename(columns={'intitutionid':'institutionid',
                                                                                       'name':'institution'}), 
                                        on='institutionid').drop(columns='institutionid').drop(columns=['affiliations','homepage','fake'])
# Add aditional information
author['sex']=pd.Series([random.randint(1, 2) for i in range(len(author))]).map({1:'Female',2:'Male'})
author['birthdate']=[fake.date() for i in range(len(author))]
author['originCountry']=[fake.country() for i in range(len(author))]
author['paperCount']=author.paperCount.fillna(pd.Series([random.randint(1,100) for i in range(len(author))]))
author['citationCount']=author.citationCount.fillna(pd.Series([random.randint(1,100) for i in range(len(author))]))
author['hIndex']=author.hIndex.fillna(pd.Series([random.randint(1,10) for i in range(len(author))]))

# Make sure paper count is less than citation count, otherwise exchange values
m=author.paperCount<author.citationCount
author.loc[m, ['paperCount', 'citationCount']] = (
    author.loc[m, ['citationCount', 'paperCount']].values)

author=author.dropna().reset_index(drop=True).rename(columns={'authorId':'author'})
author.drop(columns=['paperCount','citationCount'], inplace=True)

convert_to_dt(author)

author

To date: birthdate


Unnamed: 0,author,url,name,hIndex,institution,sex,birthdate,originCountry
0,7.265495e+06,https://www.semanticscholar.org/author/7265495,James C. Petrovich,9.0,National Taiwan University,Female,2013-07-07,Iran
1,3.885358e+06,https://www.semanticscholar.org/author/3885358,H. Tsai,22.0,National Taiwan University,Male,2005-09-14,South Africa
2,4.086158e+07,https://www.semanticscholar.org/author/40861575,Mary K. Twis,6.0,Nisho Gakusha University,Male,2004-03-02,Malaysia
3,2.073785e+09,https://www.semanticscholar.org/author/2073784912,S. Evans,3.0,Hatyai University,Female,2004-04-29,Cyprus
4,4.666378e+07,https://www.semanticscholar.org/author/46663785,Jae-Ho Lee,5.0,Hatyai University,Male,1992-01-26,Heard Island and McDonald Islands
...,...,...,...,...,...,...,...,...
4032,8.076094e+08,http://to.us.png,Canymb Duand,25.0,Universidad Privada del Este,Female,1970-09-06,Cote d'Ivoire
4033,8.229014e+08,http://inch.com/peral/The/In/itentr.by-Brache4...,For Applas,28.0,Universidade Ibirapuera,Female,2018-12-18,Samoa
4034,8.472713e+08,https://alic.win.us/the/and/to/as.asinit-orted...,Tent Se,5.0,Cankaya University,Male,2000-09-16,China
4035,8.560731e+08,http://in.biz/no.Nover18-lanati-coll,Ind Anathe,63.0,Electronics and Automation (Technical Univers...,Female,1981-06-04,Cameroon


In [4]:
# Read paper
paper=pd.read_csv('paper.csv').drop(columns=['sha','fake']).rename(columns={'id':'paper'})

# Synthesize new fields
paper['wordcount']=[random.randint(4000,7000) for i in range(len(paper))]
paper['abstract']=paper.abstract.fillna(pd.Series([fake.paragraph() for i in range(len(paper))]))
paper['type']=[random.sample(['short','demo','full','poster'], 1)[0] for i in range(len(paper))]
paper['doi']=[f'http://doi.org/{fake.iana_id()}/{fake.ipv4()}' for i in range(len(paper))]
paper.drop(columns=['url'], inplace=True)

# Merge paper information with conference and journal publication match
paper=pd.concat(
    [
        (pd.read_csv('submitted_to_conference.csv').merge(pd.read_csv('holds.csv').drop(columns=['fake']), on='edition')
         .drop(columns=['fake'])),
        (pd.read_csv('submitted_to_journal.csv').merge(pd.read_csv('volume_of.csv').drop(columns=['fake']), on='volume')
         .drop(columns=['fake']))
    ]
).drop_duplicates().merge(paper, on='paper')

# Fill in null dates
dts=pd.Series([fake.date() for i in range(len(paper))])
paper['published_date']=paper['published_date'].fillna(dts)
paper['submitted_date']=paper['submitted_date'].fillna(dts)

# Unify columns
paper['venue_type']=np.where(paper.conference.notna(), 'Conference', 'Journal')
paper['venue']=paper.conference.fillna(paper.journal)
paper['publication']=paper.edition.fillna(paper.volume)
paper.drop(columns=['edition','conference','volume','journal'], inplace=True)

# Since editions will now be conference conepts, all papers submitted to a conference will use the id of its proceeding as id for the conference
m=paper.venue_type=='Conference'
paper.loc[m, ['venue']] = (
    paper.loc[m, ['publication']].values)

convert_to_dt(paper)

#### PAPER CONSTRAINTS

# Submission date is less then published date
m=paper.submitted_date>paper.published_date
paper.loc[m, ['published_date', 'submitted_date']] = (
    paper.loc[m, ['submitted_date', 'published_date']].values)

# Poster can only be in conference. if not conference, change type
paper.loc[(paper.type=='poster')&(paper.venue_type=='Journal'),
          'type']=pd.Series([random.sample(['short','demo','full'], 1)[0] for i in range(len(paper))])

# Infer publication date from paper published dates
published=paper.groupby(['venue_type','venue','publication']).agg({'published_date':max,'submitted_date':min}).reset_index()
published['published_date']=published[['published_date','submitted_date']].max(axis=1)
published.drop(columns=['submitted_date'], inplace=True)
submitted=paper[['paper','submitted_date','venue_type','venue','publication']].copy()

# Get decision per paper
decision=pd.read_csv('reviews.csv').groupby('paper').agg({'decision':['sum','count']})
decision=((decision.iloc[:,0]/decision.iloc[:,1])>0.5).to_dict()
paper['decision']=paper.paper.map(decision)

# Delete values for non-approved papers based on review decisions
for col in ['published_date','publication','doi']:
    print(col)
    paper.loc[(paper.decision==False)&(paper[col].notna()),[col]]=np.nan

paper.drop(columns=['published_date'], inplace=True)

# Create submission id -- note: submission and paper has a one to one relationship, as stated in the assumptions
paper['submission']='sub-'+paper.paper.astype(int).astype(str)

paper

To date: published_date
To date: submitted_date
published_date
publication
doi


Unnamed: 0,paper,submitted_date,title,abstract,wordcount,type,doi,venue_type,venue,publication,decision,submission
0,2178047,2002-10-07,Applying External Solutions to Organizational ...,Often stop energy table single appear another....,6506,poster,http://doi.org/7697915/136.62.81.205,Conference,976927f9-0db0-4946-925d-d113880b67d9-2002,976927f9-0db0-4946-925d-d113880b67d9-2002,True,sub-2178047
1,46711191,2013-09-01,A loop based approach to analytical multi-core...,This paper presents a loop based formulation f...,6381,full,http://doi.org/3118498/30.190.92.150,Conference,e2716c6a-50f4-4a85-be07-a76de507f09a-2013,e2716c6a-50f4-4a85-be07-a76de507f09a-2013,True,sub-46711191
2,14154659,2012-03-25,Relay selection in multi-user amplify-forward ...,For multi-user (MU) amplify-and-forward (AF) c...,6782,short,http://doi.org/4319625/210.34.169.97,Conference,0d6f7fba-7092-46b3-8039-93458dba736b-2012,0d6f7fba-7092-46b3-8039-93458dba736b-2012,True,sub-14154659
3,14119063,2011-06-10,ID-based proxy re-signcryption scheme,"Combining the idea of signcryption, a proxy re...",6742,demo,http://doi.org/8814591/105.143.59.8,Conference,047958df-6384-459e-9864-63f946419551-2011,047958df-6384-459e-9864-63f946419551-2011,True,sub-14119063
4,45069334,2004-10-22,Autonomous control of running takeoff and land...,Full they deal. Instead body agreement a admit...,6496,poster,http://doi.org/6340417/120.176.29.163,Conference,413493e7-4bb6-4c68-a57a-e21b1b3ca448-2012,413493e7-4bb6-4c68-a57a-e21b1b3ca448-2012,True,sub-45069334
...,...,...,...,...,...,...,...,...,...,...,...,...
4325,257395327,2002-01-01,Institutions. The important forecasts because ...,"Calls this a shower, being heavier than hydrog...",6958,full,http://doi.org/1519041/90.134.1.218,Journal,ffaa4409-29fb-4245-a496-c51b151f9f5f,ffaa4409-29fb-4245-a496-c51b151f9f5f-2002,True,sub-257395327
4326,257395328,2002-01-01,Parent plant and prompted the revolution that ...,Naval dockyards röntgen discovered. With prey ...,5594,full,http://doi.org/2498358/211.29.89.225,Journal,ffaa4409-29fb-4245-a496-c51b151f9f5f,ffaa4409-29fb-4245-a496-c51b151f9f5f-2002,True,sub-257395328
4327,257395329,2020-01-01,Many intersections appellation eclipsed and ev...,Mariners and limited-convective patches. Parts...,6333,demo,,Journal,fff3549c-df24-4aef-accb-a33ae442a828,,False,sub-257395329
4328,257395330,2020-01-01,"And downtown, to Aswan and is now landfilled, ...","Don; williams, hills, mountains.. Learning dis...",5873,demo,,Journal,fff3549c-df24-4aef-accb-a33ae442a828,,False,sub-257395330


In [5]:
# Read reviews
review=pd.read_csv('reviews.csv').rename(columns={'reviewerid':'reviewer'})

# Get dates
review=(submitted
 .merge(published, on=['venue_type','venue','publication'], how='outer')
 .merge(review, on=['paper'], how='right')
)
review['reviewDate']=[fake.date_between_dates(j['submitted_date'], j['published_date']) for i,j in review.iterrows()]
review.drop(columns=['venue_type','venue','publication','submitted_date','published_date'], inplace=True)
review['review']=review['paper'].astype(str)+'-'+review['reviewer'].astype(str)

#create submission id instead of paper id 
review['submission']='sub-'+review.paper.astype(int).astype(str)
review.drop(columns=['paper'], inplace=True)

review

Unnamed: 0,reviewer,decision,content,reviewDate,review,submission
0,52331035,1,"Ed. 2009), entire landmass of some kind of mat...",2020-07-01,225065627-52331035,sub-225065627
1,73771487,0,"Fir trees contracting party), traveling in any...",2020-07-01,225065627-73771487,sub-225065627
2,34202459,1,"Louis xiv, russia also concluded alliances tha...",2020-07-01,225065627-34202459,sub-225065627
3,15729050,0,Subsets r. journals request. Mainland north an...,2021-03-28,232355224-15729050,sub-232355224
4,15674973,1,Eastern egyptian are heard. Polar origin monso...,2021-03-28,232355224-15674973,sub-232355224
...,...,...,...,...,...,...
12985,49184152,1,"Masaryk, memorials in elevation.. First countr...",2020-05-31,257395330-49184152,sub-257395330
12986,120634484,0,"City"", the cloud. youtube. Predictions (reason...",2020-07-03,257395330-120634484,sub-257395330
12987,1799398854,1,Commonwealth since government freed. Rail netw...,2020-06-28,257395331-1799398854,sub-257395331
12988,1398510823,0,And sidewalks. mestizo (mixed) of south centra...,2020-05-25,257395331-1398510823,sub-257395331


In [6]:
# Note: using edition as conference title
conference=(pd.read_csv('conference.csv').rename(columns={'id':'conference'})
            .merge(pd.read_csv('holds.csv').drop(columns=['fake']), on=['conference'])
           .merge(pd.read_csv('edition.csv').rename(columns={'id':'edition'}).drop(columns=['fake','conference']), on='edition')
           .rename(columns={'venue':'location'})
            .drop(columns=['url'])
            .rename(columns={'edition':'title','name':'conferenceSeries'})
           .drop_duplicates()
           )
conference['conference']=conference['proceeding'].copy()
conference['title']=conference['year'].astype(str) + ' ' + conference['conferenceSeries']
conference['type']=[random.sample(['workshop', 'symposium', 'expert group','regular'], 1)[0] for i in range(len(conference))]

# SENSE CHECK: Check for conference series with more than one conference -- there is one series with 2 conferences
conference[conference.duplicated(subset=['conferenceSeries'], keep=False)]

# generate more fake fields
conference['issn']=conference.issn.fillna(pd.Series([fake.ssn() for i in range(len(conference))]))
conference['publisher']=[fake.company() for i in range(len(conference))]

# Get published date
conference=(conference.merge(published[published.venue_type=='Conference']
                  .rename(columns={'venue':'conference','publication':'proceeding'}))
            .drop(columns=['venue_type'])
)

# Separate conference and proceeding: note that there is a one to one correspondence for them
cols=['title','chairperson','location','Start','End','year','conferenceSeries','type']
proceeding=conference.copy()
conference=conference[['conference']+cols].drop_duplicates().reset_index(drop=True).rename(columns={'chairperson':'organizer'})
proceeding.drop(columns=cols, inplace=True)

display(conference, proceeding)

Unnamed: 0,conference,title,organizer,location,Start,End,year,conferenceSeries,type
0,976927f9-0db0-4946-925d-d113880b67d9-2002,2002 IFIP International Conference on e-Busine...,35463674,"Dayton, Ohio, United States",2002-10-07,2002-10-07,2002,"IFIP International Conference on e-Business, e...",regular
1,e2716c6a-50f4-4a85-be07-a76de507f09a-2013,2013 Australasian Universities Power Engineeri...,2132914,"Goiânia, Goiás, Brazil",2013-09-01,2013-09-01,2013,Australasian Universities Power Engineering Co...,workshop
2,0d6f7fba-7092-46b3-8039-93458dba736b-2012,2012 IEEE International Conference on Acoustic...,1410752027,"Goianésia, Goiás, Brazil",2012-03-25,2012-03-25,2012,"IEEE International Conference on Acoustics, Sp...",symposium
3,047958df-6384-459e-9864-63f946419551-2011,2011 International Conference on Computer Scie...,119585726,"Federal, Entre Rios, Argentina",2011-06-10,2011-06-10,2011,International Conference on Computer Science a...,regular
4,413493e7-4bb6-4c68-a57a-e21b1b3ca448-2012,"2012 International Conference on Control, Auto...",2108384213,"Avignon, Provence-Alpes-Côte d'Azur, France",2012-01-01,2012-12-31,2012,"International Conference on Control, Automatio...",expert group
...,...,...,...,...,...,...,...,...,...
61,df9f7819-abf5-46f8-b6a8-6bd3261a21a5-2011,2011 Australasian Telecommunication Networks a...,2072520307,"Zavolzh’ye, Nizjnij Novgorod, Russia",2011-12-08,2011-12-08,2011,Australasian Telecommunication Networks and Ap...,regular
62,0e129215-7c25-46c9-b04b-a0e9faabf021-2011,2011 International Conference on Information N...,99921433,"Mekla, Tizi Ouzou, Algeria",2011-03-03,2011-03-03,2011,International Conference on Information Networ...,regular
63,b189dec0-41d0-4cea-a906-7c5186895904-2022,2022 Global Communications Conference,144009212,"Letňany, Praha, Czech Republic",2022-12-04,2022-12-04,2022,Global Communications Conference,symposium
64,7431ff67-91dc-41fa-b322-1b1ca657025f-2022,2022 International Conference on Information a...,2107796763,"Montijo, Setúbal, Portugal",2022-01-27,2022-01-27,2022,International Conference on Information and Kn...,symposium


Unnamed: 0,conference,proceeding,issn,publisher,published_date
0,976927f9-0db0-4946-925d-d113880b67d9-2002,976927f9-0db0-4946-925d-d113880b67d9-2002,576-01-1748,"Carpenter, Garza and Thompson",2002-10-07
1,e2716c6a-50f4-4a85-be07-a76de507f09a-2013,e2716c6a-50f4-4a85-be07-a76de507f09a-2013,561-12-3082,Martin Ltd,2013-09-01
2,0d6f7fba-7092-46b3-8039-93458dba736b-2012,0d6f7fba-7092-46b3-8039-93458dba736b-2012,325-73-8837,Richard Inc,2012-03-25
3,047958df-6384-459e-9864-63f946419551-2011,047958df-6384-459e-9864-63f946419551-2011,647-54-6787,"Rollins, Mcmillan and Doyle",2011-06-10
4,413493e7-4bb6-4c68-a57a-e21b1b3ca448-2012,413493e7-4bb6-4c68-a57a-e21b1b3ca448-2012,756-97-6613,Haley Ltd,2004-10-22
...,...,...,...,...,...
61,df9f7819-abf5-46f8-b6a8-6bd3261a21a5-2011,df9f7819-abf5-46f8-b6a8-6bd3261a21a5-2011,,"Guerra, Hayes and Khan",2011-12-08
62,0e129215-7c25-46c9-b04b-a0e9faabf021-2011,0e129215-7c25-46c9-b04b-a0e9faabf021-2011,,Cisneros and Sons,2011-03-03
63,b189dec0-41d0-4cea-a906-7c5186895904-2022,b189dec0-41d0-4cea-a906-7c5186895904-2022,,Gordon Ltd,2022-12-04
64,7431ff67-91dc-41fa-b322-1b1ca657025f-2022,7431ff67-91dc-41fa-b322-1b1ca657025f-2022,,Scott-Greene,2022-01-27


In [7]:
# Note: using volume id as volume as proceeding name
journal=(pd.read_csv('journal.csv').rename(columns={'id':'journal'})
            .merge(pd.read_csv('volume_of.csv').drop(columns=['fake']), on=['journal'])
           .merge(pd.read_csv('volume.csv').drop(columns=['volume']).rename(columns={'id':'volume'}).drop(columns=['fake']), on='volume')
           .drop_duplicates()
         .drop(columns=['url'])
         .rename(columns={'name':'title'})
        )
# Get published date
journal=(journal.merge(published[published.venue_type=='Journal']
                  .rename(columns={'venue':'journal','publication':'volume'}))
            .drop(columns=['venue_type'])
)

# generate more fake fields
journal['issn']=journal.issn.fillna(pd.Series([fake.ssn() for i in range(len(journal))]))
journal['publisher']=[fake.company() for i in range(len(journal))]
journal.drop(columns=['year'], inplace=True)

# Separate journal and volume
cols=['title','editor']
volume=journal.copy()
journal=journal[['journal']+cols].groupby(['journal','title']).head(1).reset_index(drop=True).rename(columns={'editor':'organizer'})
volume.drop(columns=cols, inplace=True)

display(journal, volume)

Unnamed: 0,journal,title,organizer
0,52df9a54-6cc3-4685-9826-f6ba927def1a,Nepalese journal of ophthalmology : a biannual...,6.748076e+06
1,9f840236-aa46-478e-98fe-68a1fe8b823e,Regenerative medicine,4.983612e+07
2,b6b26b43-2fef-41ad-98b9-af7ba33afa6b,Journal of the Medical Association of Thailand...,2.556517e+07
3,910f05b9-f423-44fc-9fc1-c6b3d2481fe0,Journal of Foot and Ankle Research,2.176073e+09
4,137df871-0be4-4ea4-9f85-52b2b36070a3,Journal of the American Dental Association,1.457783e+08
...,...,...,...
599,a00fde74-d8df-4613-b825-0fff9f531d3f,ArXiv,2.542754e+06
600,cbbe61ca-ba31-4f0a-b64c-49da7a69f20c,Hematology,2.911675e+08
601,b07ad337-ba9f-48ca-a0ff-59d1643601ac,Life sciences,7.456097e+08
602,e693dc83-bbed-4844-bfa8-a14861fdf715,Gene,4.225051e+08


Unnamed: 0,journal,issn,volume,published_date,publisher
0,52df9a54-6cc3-4685-9826-f6ba927def1a,2072-6805,52df9a54-6cc3-4685-9826-f6ba927def1a-12 24,2020-07-01,Hatfield Ltd
1,52df9a54-6cc3-4685-9826-f6ba927def1a,2072-6805,52df9a54-6cc3-4685-9826-f6ba927def1a-2020,2020-12-31,Strong Ltd
2,9f840236-aa46-478e-98fe-68a1fe8b823e,1524-0142,9f840236-aa46-478e-98fe-68a1fe8b823e-nan,2021-03-30,Brooks LLC
3,9f840236-aa46-478e-98fe-68a1fe8b823e,1524-0142,9f840236-aa46-478e-98fe-68a1fe8b823e-2021,2021-12-31,Ramirez Ltd
4,b6b26b43-2fef-41ad-98b9-af7ba33afa6b,0125-2208,b6b26b43-2fef-41ad-98b9-af7ba33afa6b-97 Suppl 6,2014-06-01,Miles and Sons
...,...,...,...,...,...
1449,b07ad337-ba9f-48ca-a0ff-59d1643601ac,2252-6277,b07ad337-ba9f-48ca-a0ff-59d1643601ac-2022,2022-12-31,"Lee, Holden and Flores"
1450,e693dc83-bbed-4844-bfa8-a14861fdf715,0378-1119,e693dc83-bbed-4844-bfa8-a14861fdf715-574 2,2015-12-15,Gilmore PLC
1451,e693dc83-bbed-4844-bfa8-a14861fdf715,0378-1119,e693dc83-bbed-4844-bfa8-a14861fdf715-2015,2015-12-31,Daniel Ltd
1452,21b36238-c30e-4cf2-aaff-afc89e023d0e,1742-2051,21b36238-c30e-4cf2-aaff-afc89e023d0e-5 4,2009-02-12,Adams and Sons


In [8]:
# Edit Paper/submission
# Get information about the chair/editor that assigned reviewers for that submission
org=pd.concat([conference[['conference','organizer']].assign(venue_type='Conference').rename(columns={'conference':'venue'}),
           journal[['journal','organizer']].assign(venue_type='Journal').rename(columns={'journal':'venue'})]).drop_duplicates()

paper=paper.merge(org, on=['venue_type','venue'], how='left')
paper

Unnamed: 0,paper,submitted_date,title,abstract,wordcount,type,doi,venue_type,venue,publication,decision,submission,organizer
0,2178047,2002-10-07,Applying External Solutions to Organizational ...,Often stop energy table single appear another....,6506,poster,http://doi.org/7697915/136.62.81.205,Conference,976927f9-0db0-4946-925d-d113880b67d9-2002,976927f9-0db0-4946-925d-d113880b67d9-2002,True,sub-2178047,3.546367e+07
1,46711191,2013-09-01,A loop based approach to analytical multi-core...,This paper presents a loop based formulation f...,6381,full,http://doi.org/3118498/30.190.92.150,Conference,e2716c6a-50f4-4a85-be07-a76de507f09a-2013,e2716c6a-50f4-4a85-be07-a76de507f09a-2013,True,sub-46711191,2.132914e+06
2,14154659,2012-03-25,Relay selection in multi-user amplify-forward ...,For multi-user (MU) amplify-and-forward (AF) c...,6782,short,http://doi.org/4319625/210.34.169.97,Conference,0d6f7fba-7092-46b3-8039-93458dba736b-2012,0d6f7fba-7092-46b3-8039-93458dba736b-2012,True,sub-14154659,1.410752e+09
3,14119063,2011-06-10,ID-based proxy re-signcryption scheme,"Combining the idea of signcryption, a proxy re...",6742,demo,http://doi.org/8814591/105.143.59.8,Conference,047958df-6384-459e-9864-63f946419551-2011,047958df-6384-459e-9864-63f946419551-2011,True,sub-14119063,1.195857e+08
4,45069334,2004-10-22,Autonomous control of running takeoff and land...,Full they deal. Instead body agreement a admit...,6496,poster,http://doi.org/6340417/120.176.29.163,Conference,413493e7-4bb6-4c68-a57a-e21b1b3ca448-2012,413493e7-4bb6-4c68-a57a-e21b1b3ca448-2012,True,sub-45069334,2.108384e+09
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4325,257395327,2002-01-01,Institutions. The important forecasts because ...,"Calls this a shower, being heavier than hydrog...",6958,full,http://doi.org/1519041/90.134.1.218,Journal,ffaa4409-29fb-4245-a496-c51b151f9f5f,ffaa4409-29fb-4245-a496-c51b151f9f5f-2002,True,sub-257395327,2.926423e+07
4326,257395328,2002-01-01,Parent plant and prompted the revolution that ...,Naval dockyards röntgen discovered. With prey ...,5594,full,http://doi.org/2498358/211.29.89.225,Journal,ffaa4409-29fb-4245-a496-c51b151f9f5f,ffaa4409-29fb-4245-a496-c51b151f9f5f-2002,True,sub-257395328,2.926423e+07
4327,257395329,2020-01-01,Many intersections appellation eclipsed and ev...,Mariners and limited-convective patches. Parts...,6333,demo,,Journal,fff3549c-df24-4aef-accb-a33ae442a828,,False,sub-257395329,6.520401e+08
4328,257395330,2020-01-01,"And downtown, to Aswan and is now landfilled, ...","Don; williams, hills, mountains.. Learning dis...",5873,demo,,Journal,fff3549c-df24-4aef-accb-a33ae442a828,,False,sub-257395330,6.520401e+08


In [9]:
hasAuthor=pd.read_csv('writes.csv').drop(columns=['fake'])
hasAuthor

Unnamed: 0,author,paper
0,7.265495e+06,219410769
1,4.086158e+07,219410769
2,2.073785e+09,219410769
3,6.880566e+06,225065627
4,5.113959e+07,209854486
...,...,...
13584,2.151717e+06,257395330
13585,4.990294e+07,257395331
13586,2.470346e+06,257395331
13587,4.627105e+07,257395331


In [10]:
area=pd.read_csv('topic.csv', usecols=['community']).rename(columns={'community':'topicName'}).drop_duplicates().reset_index(drop=True)
area['area']='area-'+area.index.astype(str)
area

Unnamed: 0,topicName,area
0,Pure Science,area-0
1,Applied Science,area-1
2,Social Science,area-2
3,Database,area-3


In [11]:
hasTopic=[]
for df in ['paper','journal','volume','conference','proceeding']:
    hasTopic.append(globals()[df][[df]].rename(columns={df:'id'}).assign(typ=df))
hasTopic=pd.concat(hasTopic, ignore_index=True)
hasTopic['area']=[random.sample(list(area.area.unique()), 1)[0] for i in range(len(hasTopic))]
hasTopic=hasTopic.merge(area, on=['area'])
hasTopic

Unnamed: 0,id,typ,area,topicName
0,2178047,paper,area-2,Social Science
1,46711191,paper,area-2,Social Science
2,14154659,paper,area-2,Social Science
3,222125022,paper,area-2,Social Science
4,15230445,paper,area-2,Social Science
...,...,...,...,...
6515,764e3630-ddac-4c21-af4b-9d32ffef082e-2013,proceeding,area-0,Pure Science
6516,3dbf084c-ef47-4b74-9919-047b40704538-2021,proceeding,area-0,Pure Science
6517,b9411e3b-428a-4b51-b8a5-32698548f583-2009,proceeding,area-0,Pure Science
6518,c6ced65d-fcf7-4e5f-bedd-01360247ebbe-2013,proceeding,area-0,Pure Science


In [12]:
del org, published
gc.collect()

0

# Define ABOX

In [86]:
import re
from datetime import datetime
from rdflib import Graph, Namespace, URIRef, Literal, RDF, XSD, FOAF, RDFS

g = Graph()
sdm = Namespace('http://example.org/sdm#')

g.bind("sdm", sdm)
g.bind("rdfs", RDFS)
g.bind("xsd", XSD)
g.bind("rdf", RDF)

NS = {
    'sdm': sdm,
    'rdf': RDF,
    'rdfs': RDFS,
    'xsd':XSD,
}

# Helper functions
def prepareValue(row, uri=sdm):
    """
    Function that prepares the values to be added to the graph as a URI or Literal
    source: https://wiki.uib.no/info216/index.php/Python_Examples#RDF_programming_with_RDFlib_.28Lab_2.29
    Input: row value 
    Output: Converted URI or literal
    """
    if row == None:  # none type
        value = Literal(row)
    elif (isinstance(row, str) and re.match(r'\d{4}-\d{2}-\d{2}', row)) or isinstance(row, datetime):  # date
        value = Literal(row, datatype=XSD.date)
    elif isinstance(row, bool):  # boolean value (true / false)
        value = Literal(row, datatype=XSD.boolean)
    elif isinstance(row, int):  # integer
        value = Literal(row, datatype=XSD.integer)
    elif isinstance(row, str):  # string
#         value = URIRef(uri + row.replace('"', '').replace(" ",
#                        "_").replace(",", "").replace("-", "_"))
        pattern='^((http|https)://)[-a-zA-Z0-9@:%._\\+~#?&//=]{2,256}\\.[a-z]{2,6}\\b([-a-zA-Z0-9@:%._\\+~#?&//=]*)$'
        value=URIRef(uri+re.sub(pattern,'_',row.replace('\n','_').replace('\t','_').replace(",", "").replace("-", "_").replace('"', '').replace('\\', '')).replace(" ",'_') )
    elif isinstance(row, float):  # float
        value = Literal(row, datatype=XSD.float)
    return value

def to_camel_case(text):
    """
    Convert string to camel case (no spaces)
    """
    x=[i for i in text]
    return ''.join(sum([],[x[0].upper()]+x[1:]))

In [66]:
# General cLean up of all dfs
df_list= %who_ls DataFrame
for df in df_list:
    print(df)
    # replace nulls with None
    globals()[df]=globals()[df].replace(np.nan, None)
    
    # Make all date columns into datetime
    dcols=[i for i in globals()[df].columns if 'date' in i.lower() or i.endswith('_dt')]
    for col in dcols:
        globals()[df][col]=pd.to_datetime(globals()[df][col])

area
author
conference
hasAuthor
hasTopic
journal
paper
proceeding
review
submitted
volume


In [71]:
# Convert the non-semantic CSV dataset into a semantic RDF
def area_to_rdf(df):
    """
    Concepts: Area
    """
    for index, row in df.iterrows():
        id = URIRef(sdm + "Area_" + str(row['area']))
        name = prepareValue(row["topicName"])
        
        # Adds the triples
        g.add((id, RDF.type, sdm.Area))
        g.add((id, sdm.hasTopicName, name))
        
def author_to_rdf(df):
    """
    Concepts: Person, Author
    QUESTION: Can we just connect all attributes to author instead of having a separate person?
    """
    for index, row in df.iterrows():
        # define values
        id = URIRef(sdm + "Person_" + str(row['author']))
        name = prepareValue(row["name"])
        birthdate = prepareValue(row["birthdate"])
        sex = prepareValue(row["sex"])
        country = prepareValue(row["originCountry"])
        
        # Adds the triples
        g.add((id, RDF.type, sdm.Person))
        g.add((id, sdm.hasPersonName, name))
        g.add((id, sdm.hasBirthDate, birthdate))
        g.add((id, sdm.hasSex, sex))
        g.add((id, sdm.originCountry, country))
        
        # Author
        id = URIRef(sdm + "Author_" + str(row['author']))
        url = prepareValue(row["url"])
        hindex = prepareValue(row["hIndex"])
        institution = prepareValue(row["institution"])
                
        # Adds the triples
        g.add((id, RDF.type, sdm.Author))
        g.add((id, sdm.url, url))
        g.add((id, sdm.hasHIndex, hindex))
        g.add((id, sdm.affiliatedWithInstitution, institution))

def conference_to_rdf(df):
    """
    Concepts: Conference
    Relationships: hasOrganizer
    QUESTION: title is saved in conference is this ok? instead of venue.
    """
    for index, row in df.iterrows():
        # define values
        id = URIRef(sdm + "Conference_" + str(row['conference']))
        conf_type={'expert group':sdm.ExpertGroup, 'symposium':sdm.Symposium, 
                   'workshop':sdm.Workshop, 'regular':sdm.RegularConference}[row['type']]
        title = prepareValue(row["title"])
        location = prepareValue(row["location"])
        start = prepareValue(row["Start"])
        end = prepareValue(row["End"])
        year = prepareValue(row["year"])
        conferenceSeries = prepareValue(row["conferenceSeries"])
        
        # Adds the triples 
        g.add((id, RDF.type, conf_type))
        g.add((id, sdm.hasVenueTitle, title))
        g.add((id, sdm.heldIn, location))
        g.add((id, sdm.startDate, start))
        g.add((id, sdm.endDate, end))
        g.add((id, sdm.heldInYear, year))
        g.add((id, sdm.conferenceSeries, conferenceSeries))
        
        # Relationships
        author_org=URIRef(sdm + "Author_" + str(row['organizer']))
        
        # Adds the triples 
        g.add((id, sdm.hasOrganizer, author_org))
        
def journal_to_rdf(df):
    """
    Concepts: Journal
    Relationships: hasOrganizer
    QUESTION: title is saved in journal is this ok? instead of venue.
    """
    for index, row in df.iterrows():
        # define values
        id = URIRef(sdm + "Journal_" + str(row['journal']))
        title = prepareValue(row["title"])
        
        # Adds the triples 
        g.add((id, RDF.type, sdm.Journal))
        g.add((id, sdm.hasVenueTitle, title))
        
        # Relationships
        author_org=URIRef(sdm + "Author_" + str(row['organizer']))
        
        # Adds the triples 
        g.add((id, sdm.hasOrganizer, author_org))

def volume_to_rdf(df):
    """
    Concepts: Volume
    Relationships: hasPublished
    QUESTION: Used Venue > Publication relationship. note that Volume URIs replace spaces with _
    """
    for index, row in df.iterrows():
        # define values
        id = URIRef(sdm + "Volume_" + str(row['volume']).replace(' ','_'))
        jid= URIRef(sdm + "Journal_" + str(row['journal']))
        issn = prepareValue(row["issn"])
        published_date = prepareValue(row["published_date"])
        publisher = prepareValue(row["publisher"])
        
        # Adds the triples
        g.add((id, RDF.type, sdm.Volume))
        g.add((id, sdm.hasPersonName, issn))
        g.add((id, sdm.hasBirthDate, published_date))
        g.add((id, sdm.hasSex, publisher))
        
        # Relationship
        g.add((jid, sdm.hasPublished, id))

def proceeding_to_rdf(df):
    """
    Concepts: proceeding
    Relationships: hasPublished
    QUESTION: Used Venue > Publication relationship
    """
    for index, row in df.iterrows():
        # define values
        id = URIRef(sdm + "Proceeding_" + str(row['proceeding']))
        cid= URIRef(sdm + "Conference_" + str(row['conference']))
        issn = prepareValue(row["issn"])
        published_date = prepareValue(row["published_date"])
        publisher = prepareValue(row["publisher"])
        
        # Adds the triples
        g.add((id, RDF.type, sdm.Proceeding))
        g.add((id, sdm.hasPersonName, issn))
        g.add((id, sdm.hasBirthDate, published_date))
        g.add((id, sdm.hasSex, publisher))
        
        # Relationship
        g.add((cid, sdm.hasPublished, id))

def paper_to_rdf(df):
    """
    Concepts: Paper, Submission
    Relationships: includedIn, publishedIn, assignedBy
    QUESTION: No DOI in TBOX. But is added for ABOX --> property name: paperDOI
    ERROR: KEEPS GETTING ERROR FOR ABTRACT URIS
    """
    for index, row in df.iterrows():
        # define values
        id = URIRef(sdm + "Paper_" + str(row['paper']))
        sid = URIRef(sdm + "Submission_" + str(row['submission']))
        oid=URIRef(sdm + 'Author_' + str(row['organizer']))
        paper_type={'demo':sdm.DemoPaper, 'full':sdm.FullPaper, 'short':sdm.ShortPaper, 'poster':sdm.Poster}[row['type']]
        
        for col in df.columns:
            locals()[col]=prepareValue(row[col])
            
        # Paper properties
        g.add((id,RDF.type, paper_type))
        g.add((id,sdm.paperAbstract, locals()['abstract']))
        g.add((id,sdm.paperTitle, locals()['title']))
        g.add((id,sdm.paperWordCount, locals()['wordcount']))

        # Submission properties
        g.add((sid,sdm.submissionDate, locals()['submitted_date']))

        # Relationships
        g.add((id,sdm.includedIn,sid))
        g.add((sid, sdm.assignedBy, oid))
        
        # Conditional property and relationship, only add if paper decision is true (published)
        if row['decision']:
            pid=URIRef(sdm + {'Conference':'Proceeding_','Journal':'Volume_'}[row['venue_type']] + str(row['publication']).replace(' ','_'))
            g.add((id,sdm.paperDOI, locals()['doi']))
            g.add((id,sdm.publishedIn,pid))

def review_to_rdf(df):
    """
    Concepts: Review
    Relationships: hasReviewer, hasReview
    ERROR: Same with paper
    """
    for index, row in df.iterrows():
        # define values
        id = URIRef(sdm + "Review_" + str(row['review']))
        sid = URIRef(sdm + "Submission_" + str(row['submission']))
        rid=URIRef(sdm + 'Author_' + str(row['reviewer']))
        
        for col in df.columns:
            print(col)
            locals()[col]=prepareValue(row[col])
            
        # Paper properties
        g.add((id,sdm.decision, locals()['decision']))
        g.add((id,sdm.content, locals()['content']))
        g.add((id,sdm.reviewDate, locals()['reviewDate']))

        # Relationships
        g.add((id,sdm.hasReviewer,sid))
        g.add((sid, sdm.hasReview, id))
        
def hasauthor_to_rdf(df):
    """
    Relationships: hasAuthor
    """
    for index, row in df.iterrows():
        # define values
        pid = URIRef(sdm + "Paper_" + str(row['paper']))
        aid = URIRef(sdm + "Author_" + str(row['author']))

        # Relationships
        g.add((pid,sdm.hasAuthor,aid))

def hasauthor_to_rdf(df):
    """
    Relationships: hasAuthor
    """
    for index, row in df.iterrows():
        # define values
        pid = URIRef(sdm + "Paper_" + str(row['paper']))
        aid = URIRef(sdm + "Author_" + str(row['author']))

        # Relationships
        g.add((pid,sdm.hasAuthor,aid))
        
def hastopic_to_rdf(df):
    """
    Relationships: paperRelatedTo, venueRelatedTo, publicationRelatedTo
    """
    for index, row in df.iterrows():
        # define values
        pid = URIRef(sdm + to_camel_case(row['typ'])+'_' + str(row['id']).replace(' ','_'))
        aid = URIRef(sdm + "Area_" + str(row['area']))
        rel={'paper':sdm.paperRelatedTo, 'journal':sdm.venueRelatedTo, 'volume':sdm.publicationRelatedTo, 
             'conference':sdm.venueRelatedTo, 'proceeding':sdm.publicationRelatedTo}[row['typ']]

        # Relationships
        g.add((pid,rel,aid))

In [54]:
area_to_rdf(area)
author_to_rdf(author)
conference_to_rdf(conference)
journal_to_rdf(journal)
volume_to_rdf(volume)
proceeding_to_rdf(proceeding)
paper_to_rdf(paper) # ERROR!
review_to_rdf(review) # ERROR!
hasauthor_to_rdf(hasAuthor)
hastopic_to_rdf(hasTopic)

http://example.org/sdm#BACKGROUND_Central_venous_catheterization_(CVC)_is_an_indispensable_route_of_venous_access_in_management_of_critically_ill_patients._Potential_CVC_related_complications_include_mechanical_and_infectious_complications.___OBJECTIVE_To_determine_type_incidence_and_risk_factor_of_CVC_related_complications_in_pediatric_patients.___MATERIAL_AND_METHOD_Prospective_observational_study_of_all_patients_who_underwent_CVC_in_pediatric_intensive_care_unit_(PICU)_at_Queen_Sirikit_National_Institute_of_Child_Health_over_a_1_year_period.___RESULTS_The_study_included_137_patients_of_whom_63.5%_were_males._The_mean_age_was_36.7_±_4.4_months._There_were_204_CVC_attempts_with_total_indwell_time_of_2002_days._The_rate_of_mechanical_complication_was_19%_including_failure_to_place_catheter_(9.3%)_hematoma_(4.9%)_arterial_puncture_(2%)_and_pneumothorax_(1.5%)._Patient_body_mass_index_(BMI)_>_30_kg/m2_internal_jugular_venous_catheterization_and_longer_insertion_time_(>_30_minutes)_were_a

http://example.org/sdm#It_was_recently_claimed_by_Zagarella_and_colleagues_that_there_was_‘no_convincing_evidence_that_sentinel_node_(SN)_status_is_a_stronger_predictor_of_survival_outcome_when_compared_to_a_combination_of_Breslow_thickness_ulceration_mitotic_rate_age_and_other_factors’._However_they_offered_no_new_primary_data_to_support_their_view_existing_data_were_misinterpreted_the_overwhelming_body_of_evidence_that_SN_status_provides_important_staging_information_in_addition_to_that_provided_by_patient_factors_and_primary_tumour_thickness_was_overlooked_and_the_recommendations_of_the_American_Society_of_Clinical_Oncology_and_the_Society_of_Surgical_Oncology_were_ignored._The_results_of_a_large_randomised_controlled_trial_the_first_Multicenter_Selective_Lymphadenectomy_Trial_(MSLT_I)_provided_high_level_evidence_in_support_of_the_prognostic_value_of_SNB_and_cemented_its_role_as_a_high_yield_lowmorbidity_staging_procedure_for_patients_at_significant_risk_of_occult_lymph_node_metast

http://example.org/sdm#_Purpose_Stunting_and_being_underweight_in_children_are_major_nutritional_problems_especially_in_developing_countries._The_purpose_of_this_study_is_to_evaluate_the_effect_of_egg_supplementation_on_a_vitamin_and_mineral_fortification_program_for_growth_cognitive_development_and_hemoglobin_in_underweight_and_stunted_children.___Design/methodology/approach_This_was_an_experimental_study_using_a_crossover_design_conducted_in_Yogyakarta_province_Indonesia._A_total_of_39_subjects_were_randomly_selected_from_two_community_health_centers_and_provided_with_two_types_of_intervention:_vitamin_and_mineral_fortification_sprinkle_(Taburia)_and_Taburia_with_egg_supplementation_(Taburia_PLUS)._Each_intervention_was_conducted_for_three_months_with_one_month_of_washout_period_in_between_interventions.___Findings_There_were_no_changes_in_weight_for_age_Z_score_in_Taburia_and_Taburia_PLUS_(all_p_>_0.05)._The_height_for_age_Z_score_was_increased_in_Taburia_PLUS_(p_=_0.022)_but_not_in

http://example.org/sdm#Background_The_performance_in_master_marathoners_has_been_investigated_in_flat_city_marathons_but_not_in_mountain_marathons._This_study_examined_changes_in_the_sex_differences_in_performance_across_time_in_female_and_male_master_runners_competing_in_a_mountain_marathon_compared_to_a_flat_city_marathon._Methods_The_association_between_age_and_performance_of_finishers_in_the_Jungfrau_Marathon_Switzerland_with_1830_meter_changes_in_altitude_and_a_flat_city_marathon_(Lausanne_Marathon)_Switzerland_were_analyzed_from_2000_to_2011._Results_In_both_events_athletes_in_the_35–44_years_age_group_showed_the_highest_number_of_finishers._In_the_mountain_marathon_the_number_of_female_master_runners_aged_>_35_years_increased_in_contrast_to_female_finishers_aged_<_35_years_while_the_number_of_male_finishers_was_unchanged_in_all_age_groups._In_the_city_marathon_the_number_of_female_finishers_was_unchanged_while_the_number_of_male_finishers_in_the_age_groups_for_25–34_year_olds_an

http://example.org/sdm#Abstract_Alkylphenols_a_nonionic_surface_active_agent_group_such_as_nonylphenol_(NP)_and_octylphenol_(OP)_are_important_endocrine_disrupting_chemicals_(EDC)._In_this_study_the_dose__and_time_dependent_effects_of_NP_and_OP_were_investigated_in_the_primary_hepatocyte_culture_of_Van_Fish._In_this_study_samples_were_taken_at_different_times_and_biochemical_parameters_were_studied_separately._The_effects_of_the_chemicals_used_on_SOD_CAT_GSH_Px_MDA_and_8_OHdG_were_investigated_in_hepatocyte_culture._The_antioxidants_SOD_and_CAT_were_observed_to_increase_in_all_groups_in_the_primary_hepatocyte_cultures_at_the_24th_hour_after_NP_and_OP_administration_whereas_the_GSH_Px_level_was_observed_to_increase_with_OP_at_the_24th_hour_and_with_NP_at_the_48th_hour._The_MDA_level_was_observed_to_reach_its_highest_value_for_both_chemicals_in_the_24th_hour_and_the_8_OHdG_level_was_observed_to_increase_toward_the_end_of_the_follow_up_time_compared_to_the_control_group_(p < 0.05)._In_con

http://example.org/sdm#A_laboratory_study_is_presented_where_a_technique_of_band_retention_testing_is_characterized._A_glass_polyalkenoate_and_a_zinc_phosphate_cement_were_evaluated_for_band_retention_under_simulated_conditions_of_mechanical_stress._In_the_absence_of_mechanical_and_other_stresses_there_was_no_difference_in_the_two_cements_for_band_retention._However_after_mechanical_stress_was_applied_the_glass_polyalkenoate_cement_proved_superior_to_the_zinc_polyalkenoate_cement_for_band_retention_(P<0·05)._Weibull_analysis_is_presented_and_described_as_a_useful_adjunct_to_statistical_handling_of_band_retention_data. does not look like a valid URI, trying to serialize this will break.
http://example.org/sdm#Objective:_Pharmacological_management_of_heart_failure_and_comorbidities_may_result_in_polypharmacy_but_there_are_few_population_based_studies_that_portray_the_use_of_medications_over_time._We_aimed_to_describe_the_trends_in_polypharmacy_and_medication_use_in_older_adults_with_hear

http://example.org/sdm#ObjectiveTest_the_efficacy_of_a_mixture_of_six_NEF_(N1_N2_N3)_GAG_(G1_G2)_and_ENV_(E)_lipopeptides_in_the_induction_of_B__and_T_cell_anti_HIV_responses._DesignA_randomized_phase_I_open_label_dose_finding_trial._Twenty_eight_healthy_seronegative_volunteers_received_the_lipopeptides_with_or_without_the_adjuvant_QS21._MethodsAnti_HIV_peptide_antibodies_were_detected_by_enzyme_linked_immunosorbent_assay_and_Western_blotting._Induction_of_cellulary_responses_was_assessed_by_proliferative_test_and_51Cr_release_assay._ResultsLocal_and_systemic_adverse_reactions_were_always_mild_or_moderate._After_three_injections_an_antibody_response_was_detected_in_25_out_of_28_volunteers_(89%)._T_cells_from_19_(79%)_of_the_24_volunteers_proliferated_in_response_to_at_least_one_peptide._The_majority_of_the_volunteers_had_induced_a_multispecific_proliferative_response;_that_is_cells_from_volunteers_proliferated_to_two_(five_of_19)_three_(five_of_19)_four_(three_of_19)_or_five_peptides_(

http://example.org/sdm#Based_on_the_situation_of_physician_trust_in_the_patient_(PTP)_we_explored_the_differences_in_perceived_behaviors_of_physician_trust_in_the_patient_(PBPTP)._We_used_the_PTP_scale_as_a_research_tool_taking_physicians_of_the_hospitals_in_Anhui_region_as_the_research_object_to_carry_out_the_investigation_of_PTP_Python_software_was_applied_to_explore_the_status_of_PTP_and_the_differences_of_PBPTP_distribution_rate_with_different_demographic_characteristic_variables_were_compared_by_testing_based_on_theory_of_planned_behavior._We_get_six_results_as_follows:_(1)_the_overall_PTP_level_was_low_and_nearly_50%_of_doctors_doubt_the_integrity_of_patients._“Patients_will_not_be_driven_by_improper_interests”_becomes_the_most_reluctant_problem_or_the_most_distrustful_option_for_doctors._(2)_In_terms_of_patients’_participation_in_disease_management_and_regular_follow_up_visits_PTP_rate_in_male_was_higher_than_that_in_female_(Ps < 0.018)._(3)_PBPTP_was_affected_by_age_(Ps < 0.017

http://example.org/sdm#Indigestible_components_including_indigestible_dry_matter_(iDM)_and_indigestible_neutral_detergent_fiber_(iNDF)_play_an_integral_role_as_internal_markers_for_determining_ruminal_kinetics_and_digestibility_estimations._However_the_accuracy_of_internal_markers_is_dependent_upon_the_incubation_technique_utilized_as_bag_type_(BT)_and_incubation_length_(IL)_can_be_significant_sources_of_error._Previous_studies_have_primarily_focused_on_iDM_and_iNDF_as_digestibility_markers_but_few_studies_have_compared_digestibility_estimates_to_those_of_acid_detergent_insoluble_ash_(ADIA)._Therefore_our_objective_was_to_investigate_the_effect_of_BT_(F57_F58_and_Dacron)_and_IL_(288_and_576_h)_on_iDM_and_iNDF_residues_DM_and_NDF_digestibilities_and_fecal_recoveries_when_using_in_situ_incubations._Additionally_we_evaluated_the_accuracy_of_digestibility_estimates_when_using_iDM_iNDF_and_ADIA._For_iDM_and_iNDF_feed_residues_demonstrated_a_BT_×_IL_interaction_(P_<_0.01)._However_fecal_resi

http://example.org/sdm#The_purpose_of_the_present_study_was_to_evaluate_the_effect_of_plasma_from_eclamptic_and_preeclamptic_patients_on_cultured_sympathetic_nerve._Sympathetic_neurons_from_12__to_14_day_old_chick_embryos_were_cultured;_the_neurons_were_then_stimulated_with_50%_plasma_from_eclamptic_preeclamptic_hypertensive_normotensive_pregnant_hypertensive_and_normotensive_nonpregnant_women_(n=7)._Similarly_neurons_were_individually_incubated_with_mixtures_of_50%_corresponding_plasma_with_0.25%_bupivacaine_or_bupivacaine_only_(n=7)._Furthermore_the_effects_of_1%_10%_and_50%_plasma_from_eclamptic_preeclamptic_and_normotensive_pregnant_patients_(n=7)_were_also_evaluated._Norepinephrine_concentrations_were_measured_by_high_performance_liquid_chromatography._Electron_microscopic_studies_of_nerve_cells_were_also_performed._Stimulation_with_plasma_from_eclamptic_and_preeclamptic_women_significantly_increased_norepinephrine_concentration_(P<0.0001)_compared_with_control._The_release_of_nor

http://example.org/sdm#To_earth_reporter_storm_field_was_not_until_after_passage_of_civil_society_inside_and._Never_able_the_renowned._Other_things)_protocol_(bgp)_forming._Buccaneers_began_hot_or_cold_semiarid.._Ego_transcendence_objectivity_dynamically_typed._________{displaystyle_e_{k.._Post_one_drivers'_championships_more_than_50_million_years_ago.._Media._internet_media_account_numerous._The_forecast)_keeping_pace_this_has_altered_youth's._Of_bedrock_that_46.6_percent_of_the_canada_pension_plan_and._By_hong_aging_with._Americana._1920._seattle_and_london:_university_of_munich?._Vessel_for_organizations._journalism_is_nonfiction.._Approach_open_mindedness_model)_each_protocol_leverages. does not look like a valid URI, trying to serialize this will break.
http://example.org/sdm#Walk_by_taught_since._Tribes_are_platform_twitter_to_connect_a_number_of_staff_mohamed._Large_media_landmass._it_has._From_socialist_distribution._the_atlanta_journal_constitution_is._(130_mph)_{d}_mathbf_{s}

http://example.org/sdm#This_83_°f_(20_°c)_in_fort_lauderdale_florida._day_rooms_are_booked_in_a._Vial_is_the_accession_of_the_32nd_president_of_prussia_was_offered_the_resignation_of._Abreu_andreas_and_aalborg._Subsequent_researchers_kipling_d?._Question_answering_system._of_escher.._Religious_sites_young;_hospices_for_the_advancement_of_science)_doi:_10.1126/science.aaa7864_2015._Be_encrypted_gold_dust._Atlanta's_cost_:__g1_…_gn_|_b1_…_bn._where._He_placed_several_language._(who_founded_concerts_are_given_the_etymology_of_this. does not look like a valid URI, trying to serialize this will break.
http://example.org/sdm#Laurentia_block)_initial_non_indigenous_settlement_occurred_in_1741.._International_holocaust_originator._in._Theory_planetesimals_play_it_pretty_for_atlanta._during_the_cold_war_era_montana._Talkers_but_line_defined_as_the_church_was_responsible_for_the_duration._Achieving_the_{displaystyle_e=mc^{2}}__where_the_probability_of_being._Guardsman_in_safety_issues_they_are_n

http://example.org/sdm#Tgeq_{frac_Transmission_speed_ranges_from_200_to_170_Ma._The does not look like a valid URI, trying to serialize this will break.
http://example.org/sdm#Michigan_are_on_appendix_i_of_cites_thus_prohibiting_commercial_international._Thought_known_appreciated_until_now.._Mammalian_life_the_text_was_originally_only_in_french.._Towns_engaging_lacquerware_swords_and_dolls;_performances_of_bunraku_kabuki.._1200_miles_{displaystyle_c^{2.. does not look like a valid URI, trying to serialize this will break.
http://example.org/sdm#Labour_force_and_<_are_constraint_predicates_to_be_retired._Simonsohn_did_indeed does not look like a valid URI, trying to serialize this will break.


# Export

In [58]:
print(g.serialize())

Exception: "http://example.org/sdm#Trace_elements_might_play_a_role_in_the_complex_multifactorial_pathogenesis_of_age_related_macular_degeneration_(AMD)._The_aim_of_this_study_was_to_measure_alterations_of_trace_elements_levels_in_aqueous_humor_of_patients_with_non_exsudative_(dry)_AMD._For_this_pilot_study_aqueous_humor_samples_were_collected_from_patients_undergoing_cataract_surgery._12_patients_with_dry_AMD_(age_77.9±6.62_female_8_male_4)_and_11_patients_without_AMD_(age_66.6±16.7_female_7_male_4)_were_included._Aqueous_levels_of_cadmium_cobalt_copper_iron_manganese_selenium_and_zinc_were_measured_by_use_of_Flow_Injection_Inductively_Coupled_Plasma_Mass_Spectrometry_(FI_ICP_MS)_quality_controlled_with_certified_standards._Patients_with_AMD_had_significantly_higher_aqueous_humor_levels_of_cadmium_(median:_0.70_µmol/L_IQR:_0.40–0.84_vs._0.06_µmol/L;_IQR:_0.01–.018;_p = 0.002)_cobalt_(median:_3.1_µmol/L_IQR:_2.62–3.15_vs._1.17_µmol/L;_IQR:_0.95–1.27;_p<0.001)_iron_(median:_311_µmol/L_IQR:_289–329_vs._129_µmol/L;_IQR:_111–145;_p<0.001)_and_zinc_(median:_23.1_µmol/L_IQR:_12.9–32.6_vs._5.1_µmol/L;_IQR:_4.4–9.4;_p = 0.020)_when_compared_with_patients_without_AMD._Copper_levels_were_significantly_reduced_in_patients_with_AMD_(median:_16.2_µmol/L_IQR:_11.4–31.3_vs._49.9_µmol/L;_IQR:_32.0–.142.0;_p = 0.022)_when_compared_to_those_without._No_significant_differences_were_observed_in_aqueous_humor_levels_of_manganese_and_selenium_between_patients_with_and_without_AMD._After_an_adjustment_for_multiple_testing_cadmium_cobalt_copper_and_iron_remained_a_significant_factor_in_GLM_models_(adjusted_for_age_and_gender_of_the_patients)_for_AMD._Alterations_of_trace_element_levels_support_the_hypothesis_that_cadmium_cobalt_iron_and_copper_are_involved_in_the_pathogenesis_of_AMD." does not look like a valid URI, I cannot serialize this as N3/Turtle. Perhaps you wanted to urlencode it?