In [None]:
pip install -r requirements.txt

In [2]:
import json
import re
import pandas as pd
import requests

In [3]:
#query from the curl command of the search request 
query = {"query" : {
  "bool" : {
    "must" : [ {
      "term" : {
        "publicState" : {
          "value" : "RELEASED"
        }
      }
    }, {
      "term" : {
        "versionState" : {
          "value" : "RELEASED"
        }
      }
    }, {
      "bool" : {
        "must" : [ {
          "bool" : {
            "should" : [ {
              "term" : {
                "metadata.creators.person.organizations.identifierPath" : {
                  "value" : "ou_2421692"
                }
              }
            }, {
              "term" : {
                "metadata.creators.organization.identifierPath" : {
                  "value" : "ou_2421692"
                }
              }
            } ]
          }
        }, {
          "bool" : {
            "should" : [ {
              "range" : {
                "metadata.datePublishedInPrint" : {
                  "gte" : "2018||/y",
                  "lte" : "2023||/y"
                }
              }
            }, {
              "range" : {
                "metadata.datePublishedOnline" : {
                  "gte" : "2018||/y",
                  "lte" : "2023||/y"
                }
              }
            }, {
              "range" : {
                "metadata.dateAccepted" : {
                  "gte" : "2018||/y",
                  "lte" : "2023||/y"
                }
              }
            }, {
              "range" : {
                "metadata.dateSubmitted" : {
                  "gte" : "2018||/y",
                  "lte" : "2023||/y"
                }
              }
            }, {
              "range" : {
                "metadata.dateModified" : {
                  "gte" : "2018||/y",
                  "lte" : "2023||/y"
                }
              }
            }, {
              "range" : {
                "metadata.dateCreated" : {
                  "gte" : "2018||/y",
                  "lte" : "2023||/y"
                }
              }
            } ]
          }
        } ]
      }
    } ]
  }
},"sort" : [{"metadata.event.startDate":{"order":"asc"}}],"size" : "5000","from" : "0"}

In [4]:
#https://requests.readthedocs.io/en/latest/user/quickstart/#passing-parameters-in-urls
#Sends search request to PuRe REST API with the parameter defined above 

r = requests.post("https://pure.mpg.de/rest/items/search?format=json", json=query)

data = r.json()                                                    #Output is the same as the JSON-Export file

In [5]:
#Create empty DataFrame where Metadata columns will be added
df = pd.DataFrame() 

In [6]:
#Extract PuRe_ID from JSON and add to DataFrame

n = 0
ID = []

for i in data['records']:                                   #Goes through individual record items
    ID.append(data['records'][n]['persistenceId'])
    n += 1
    
df['ID'] = ID

In [7]:
#Extract Creators from JSON and add to DataFrame

n = 0 
creators = []

for i in data['records']:
    c_in = []
    m = 0
    for p in data['records'][n]['data']['metadata']['creators']:           #To capture all persons listed under creators
        if 'person' in data['records'][n]['data']['metadata']['creators'][m]:
            if 'givenName' in data['records'][n]['data']['metadata']['creators'][m]['person']:
                name = ""
                name = data['records'][n]['data']['metadata']['creators'][m]['person']['givenName'] + " " + data['records'][n]['data']['metadata']['creators'][m]['person']['familyName']
                                                                                                                                                    #string of first name + space + last name
                c_in.append(name)
            else:
                name = ""
                name = data['records'][n]['data']['metadata']['creators'][m]['person']['familyName']
                c_in.append(name)
        else:
            name = ""
            name = data['records'][n]['data']['metadata']['creators'][m]['organization']['name']
            c_in.append(name)
        m +=1 
    creators.append("; ".join(c_in))      #combines the names of all creators in this item with the seperator "; " 
    n +=1
    
df['Creators'] = creators

In [8]:
#Extract Title from JSON and add to DataFrame

n = 0
title = []

for i in data['records']:
    title.append(data['records'][n]['data']['metadata']['title'])
    n += 1
    
df['Title'] = title

In [9]:
#Extract Genre from JSON and add to DataFrame

n = 0
genre = []

for i in data['records']:
    genre.append(data['records'][n]['data']['metadata']['genre'])
    n += 1
    
df['Genre'] = genre

In [10]:
#Extract Date from JSON and add to DataFrame

n = 0
date = []
status = []

for i in data['records']:
    if 'datePublishedInPrint' in data['records'][n]['data']['metadata']:                  #Only ever one Date selected by priorities 
        date.append(data['records'][n]['data']['metadata']['datePublishedInPrint'])       #Published before Online before Accepten before Submitted 
        status.append("Published in Print")
    elif 'datePublishedOnline' in data['records'][n]['data']['metadata']:
        date.append(data['records'][n]['data']['metadata']['datePublishedOnline'])
        status.append("Published Online")
    elif 'dateAccepted'in data['records'][n]['data']['metadata']:
        date.append(data['records'][n]['data']['metadata']['dateAccepted'])
        status.append("Accepted")
    elif "dateSubmitted" in data['records'][n]['data']['metadata']:
        date.append(data['records'][n]['data']['metadata']['dateSubmitted'])
        status.append("Submitted")
    elif "dateCreated" in data['records'][n]['data']['metadata']:
        date.append(data['records'][n]['data']['metadata']['dateCreated'])
        status.append("Created")
    else:
        date.append('NA')
    n += 1

df['Status'] = status
df['Date'] = date

In [11]:
#Extract OA Status from JSON and add to DataFrame

n = 0 
OA = []

for i in data['records']:
    if "files" in data['records'][n]['data']:                   #Only if there are files
        oa_in = []
        m = 0
        for p in data['records'][n]['data']['files']:
            if 'oaStatus' in data['records'][n]['data']['files'][m]['metadata']:
                oa_in.append(data['records'][n]['data']['files'][m]['metadata']['oaStatus'])
            else:
                oa_in.append("NA")
            m +=1 
        OA.append("; ".join(oa_in))                           #combines OA status for all files attached separated by "; "
        n +=1
    else:
        OA.append("NA")                                       #Needed so index of existing df and column match 
    
df['OAStatus'] = OA

In [12]:
#Check Dataframe
df

Unnamed: 0,ID,Creators,Title,Genre,Status,Date,OAStatus
0,item_3207274_1,Vanessa P. Bailey; Michael Bottom; Eric Cady; ...,Lessons for WFIRST CGI from ground-based high-...,PROCEEDINGS,Published in Print,2018,
1,item_3207064_1,K. Colon; G. Zhou; A. Shporer; K. A. Collins; ...,Ongoing Variability in Transits of the Disinte...,CONFERENCE_PAPER,Published in Print,2018,
2,item_3207002_1,R. Decarli; C. Carilli; C. Casey; B. Emonts; J...,Cold Gas in High-z Galaxies: The Dense ISM,PROCEEDINGS,Published in Print,2018,
3,item_3206198_1,M. Steffen; A. J. Gallagher; E. Caffau; P. Bon...,Carbon-enhanced metal-poor 3D model atmospheres,PROCEEDINGS,Published in Print,2018,
4,item_3206610_1,Hendrik Linz; Luisa Buinhas; Roger Förstner; M...,Far-infrared space interferometer study IRASSI...,PROCEEDINGS,Published in Print,2018,
...,...,...,...,...,...,...,...
1307,item_3206056_1,Peng Wang; Xi Kang,The build up of the correlation between halo s...,ARTICLE,Published in Print,2018,
1308,item_3206074_1,Mark Vogelsberger; Federico Marinacci; Paul To...,The uniformity and time-invariance of the intr...,ARTICLE,Published in Print,2018,
1309,item_3206094_1,L. Vanzi; A. Zapata; M. Flores; R. Brahm; M. T...,Precision stellar radial velocity measurements...,ARTICLE,Published in Print,2018,
1310,item_3211220_1,Ken-ichi Tadaki; Daisuke Iono; Bunyo Hatsukade...,CNO Emission of an Unlensed Submillimeter Gala...,ARTICLE,Published in Print,2019,


In [15]:
#Write Dataframe as csv into file 
filepath = str(C:\Users\leiminger\Seafile\Meine Bibliothek\Support\CSVScript\)
institute = "MPIAstro"
df.to_csv(filepath + institute + '.csv')

SyntaxError: invalid syntax (3010563868.py, line 2)