In [1]:
import os
import json
from containers import Paper, Author

In [2]:
from tqdm import tqdm
import requests
import urllib.parse

def fillCrossRefMetaData(paper):
    # Fill in the paper metadata from the CrossRef API

    title = paper.title
    #author = paper.author

    encoded_title = urllib.parse.quote(title)
    api_url = f"https://api.crossref.org/works?query.title={encoded_title}"
    response = requests.get(api_url)
    if response.status_code == 200:
        data = response.json()
        items = data['message'].get('items', [])
        if items:
            # Assuming the first result is the correct one
            #return items[0].get('DOI')
            paper.doi = items[0].get('DOI')
    
            doi_api_url = f"https://api.crossref.org/works/{doi}"
            metadata_response = requests.get(doi_api_url)
            if metadata_response.status_code == 200:
                data = metadata_response.json()
                reference_list = data['message'].get('reference', [])
                paper.reference_list = reference_list
            else :
                paper.reference_list = False
        else :
            paper.doi = False
    else :
        paper.doi = False


import requests
import urllib.parse

class CrossRefFetcher :
    def __init__(self) :
        pass

    def fetchMetaDatafromTitle(self, paper) :
        '''
        args :
            paper : Paper
                expect paper.title
        '''
        title = urllib.parse.quote(paper.title)
        url = f'https://api.crossref.org/works?query.bibliographic={title}&rows=1'

        try :
            r = requests.get(url)
            metadata = r.json()['message']['items'][0]
            if len(metadata) == 0 :
                paper.DOI = False
                paper.reference_list = False
                return None
            reference_list = []
            try :
                paper.DOI = metadata['DOI']
                for reference in metadata['reference'] :
                    if 'DOI' in reference :
                        reference_list.append(reference['DOI'])
            except :
                pass
            paper.crossref_json = metadata
            paper.reference_list = reference_list
        
        except Exception as e :
            paper.DOI = False
            paper.crossref_json = False

In [24]:
whole_author_list = []
whole_paper_dict = {}


AUTHOR_FILE_PATH = "./author_list.json"
if os.path.exists(AUTHOR_FILE_PATH) :
    with open(AUTHOR_FILE_PATH, "r") as f :
        author_list_raw = json.load(f)
    for author in author_list_raw :
        whole_author_list.append(Author(**author))



WHOLE_PAPER_FILE_PATH = "./whole_paper_dict.json"
if os.path.exists(WHOLE_PAPER_FILE_PATH) :
    with open(WHOLE_PAPER_FILE_PATH, "r") as f :
        whole_paper_dict = json.load(f)
    for k, v in whole_paper_dict.items() :
        whole_paper_dict[k] = Paper(**v)

PROCESSED_PAPER_FILE_PATH = "./processed_paper_dict.json"
if os.path.exists(PROCESSED_PAPER_FILE_PATH) :
    with open(PROCESSED_PAPER_FILE_PATH, "r") as f :
        processed_paper_dict = json.load(f)
    for k, v in processed_paper_dict.items() :
        processed_paper_dict[k] = Paper(**v)

def checkAlreadyProcessed(key) :
    return (
        key in processed_paper_dict
    ) and (
        processed_paper_dict[key].DOI is not False
    ) and (
        processed_paper_dict[key].DOI is not None
    )
        
paper_to_process_keys_list = list(filter(
    lambda key : not checkAlreadyProcessed(key),
    whole_paper_dict.keys()
))

print(f"whole paper dict size : {len(whole_paper_dict)}, paper to process : {len(paper_to_process_keys_list)}")

crossref_fetcher = CrossRefFetcher()
for key in tqdm(paper_to_process_keys_list) :
    paper = whole_paper_dict[key]
    crossref_fetcher.fetchMetaDatafromTitle(paper)

whole paper dict size : 3245, paper to process : 2605


 96%|█████████▌| 2497/2605 [51:51<02:05,  1.16s/it] 

In [18]:
whole_paper_dict.keys()
processed_paper_dict.keys()



In [19]:
"Deep residual learning for image recognition" in processed_paper_dict

True

In [22]:
processed_paper_dict["Deep residual learning for image recognition"].DOI is not False

True

In [12]:
IDX = 0
paper_to_process_keys_list[IDX]

paper = whole_paper_dict[paper_to_process_keys_list[IDX]]
print(paper.reference_list)

['10.1007/3-540-49430-8_11', '10.1017/CBO9780511812651', '10.1109/CVPR.2007.383266', '10.1109/CVPR.2015.7298965', '10.1109/72.279181', '10.1162/neco.1989.1.4.541', '10.1007/3-540-49430-8_2', '10.1109/CVPR.2015.7299173', '10.1109/ICCV.2015.123', '10.1162/neco.1997.9.8.1735', '10.1109/TPAMI.2010.57', '10.1109/TPAMI.2011.235', '10.5244/C.25.76', '10.1137/1.9780898719505', '10.1109/ICCV.2015.169', '10.1007/s11263-009-0275-4', '10.1109/CVPR.2014.81', '10.1007/978-3-642-42054-2_55', '10.1145/1179352.1142005', '10.1007/978-1-4757-3121-7', '10.1109/34.56188', '10.1109/CVPR.2015.7298594']


In [14]:
for key, paper in whole_paper_dict.items() :
    whole_paper_dict[key] = paper.toDict()
with open(PROCESSED_PAPER_FILE_PATH, "w") as f :
    json.dump(whole_paper_dict, f, indent=4, ensure_ascii=False)


In [10]:
1

{'Deep Residual Learning for Image Recognition': Paper(DOI=None, crossref_json=None, google_schorlar_metadata={'게시 날짜': '2016', '설명': 'Deeper neural networks are more difficult to train. We present a residual learning framework to ease the training of networks that are substantially deeper than those used previously. We explicitly reformulate the layers as learning residual functions with reference to the layer inputs, instead of learning unreferenced functions. We provide comprehensive empirical evidence showing that these residual networks are easier to optimize, and can gain accuracy from considerably increased depth. On the ImageNet dataset we evaluate residual nets with a depth of up to 152 layers---8x deeper than VGG nets but still having lower complexity. An ensemble of these residual nets achieves 3.57% error on the ImageNet test set. This result won the 1st place on the ILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100 and 1000 layers. The depth of

In [None]:
from tqdm import tqdm
import requests
import urllib.parse

def get_doi_from_title(title):
    encoded_title = urllib.parse.quote_plus(title)
    api_url = f"https://api.crossref.org/works?query.title={encoded_title}"
    response = requests.get(api_url)
    if response.status_code == 200:
        data = response.json()
        items = data['message'].get('items', [])
        if items:
            # Assuming the first result is the correct one
            return items[0].get('DOI')
    return None

def get_references_from_doi(doi):
    api_url = f"https://api.crossref.org/works/{doi}"
    response = requests.get(api_url)
    if response.status_code == 200:
        data = response.json()
        references = data['message'].get('reference', [])
        return references
    return []


'''
# Example usage:
paper_title = "Example Title of Your Academic Paper"
paper_title = "3D Gaussian Splatting for Real-Time Radiance Field Rendering"
paper_title = "Sound-Guided Semantic Image Manipulation"

paper_doi = get_doi_from_title(paper_title)

if paper_doi:
    print(f"DOI for '{paper_title}': {paper_doi}")
    references = get_references_from_doi(paper_doi)
    if references:
        print("References:")
        for ref in references:
            print(ref.get('DOI', 'No DOI provided'))
else:
    print(f"No DOI found for the paper titled '{paper_title}'")
'''



def getMetaData(title) :

    # URL encode the title to ensure it's in the correct format for a URL
    encoded_title = urllib.parse.quote_plus(title)

    # The Crossref API endpoint for works
    api_url = f"https://api.crossref.org/works?query.title={encoded_title}"

    # Make the GET request to the Crossref API
    response = requests.get(api_url)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the response to JSON
        data = response.json()

        # Check if there are items in the message
        if 'items' in data['message']:
            # Loop through the items to find the first one with a DOI
            for item in data['message']['items']:
                # Print the DOI
                #print("Title:", item.get('title')[0])
                #print("DOI:", item.get('DOI'))
                break
        else:
            print("No results found for this title.")
    else:
        print(f"Error: {response.status_code}")
    
    return data


class CrossRefHandler :
    def __init__(self) :
        pass

In [1]:
import requests
import urllib.parse

# Replace 'author_name' with the name of the author you're searching for
author_name = "Sangpil Kim"
encoded_author_name = urllib.parse.quote_plus(author_name)

# The Crossref API endpoint for works
api_url = f"https://api.crossref.org/works?query.author={encoded_author_name}"

# Make the GET request to the Crossref API
response = requests.get(api_url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the response to JSON
    data = response.json()

    # Loop through the items and print the titles and DOIs
    for item in data['message']['items']:
        title = item.get('title')[0] if item.get('title') else 'No title provided'
        doi = item.get('DOI', 'No DOI provided')
        print(f"Title: {title}")
        print(f"DOI: {doi}")
        print()
else:
    print(f"Error: {response.status_code}")

Title: Time series causal analyses of the logistics industry market size and its growth in Korea
DOI: 10.15735/kls.2008.16.1.005

Title: Effects of low intensity pulsed ultrasound stimulation on metabolic lipolysis of adipocytes
DOI: 10.1101/2022.03.29.486238

Title: Low-intensity pulsed ultrasound induces metabolic lipolysis of adipocytes
DOI: 10.1121/10.0007709

Title: Effects of Facebook Fan Page Users’ Social Capital Needs, Innovativeness and Self-Presentation Needs on Interactive Behavior: Brand Identification as a Moderator Variable
DOI: 10.17485/ijst/2016/v9i26/97277

Title: The effect of youtube Pre-Roll advertising on VTR (View through rate) and CTR (Click through rate)
DOI: 10.5958/0976-5506.2018.01036.7

Title: An Analysis on the Degree of Importance of Multifactorial Value of Agriculture-Rural Village and Policies' Public Relations
DOI: 10.21331/jprapr.2014.7.3.001

Title: Which Company is the Best for Green Advertising?: The Effects of Green Advertising on Cosumer Response

In [9]:
dic1 = {"1": 1, "2":2, "3":3, "4":4}

dic2 = {"1": 2, "a": 3, "b": 4}

dic1.update(dic2)

dic1

{'1': 2, '2': 2, '3': 3, '4': 4, 'a': 3, 'b': 4}

In [8]:
import time
for title in tqdm(title_list) :
    time.sleep(0.1)

100%|██████████| 112/112 [00:11<00:00,  9.50it/s]


'Darwin'

In [10]:
import os