In [13]:
import pandas as pd
import numpy as np
from tqdm import tqdm

# Read in the data
Authors = pd.read_excel('AuthorDatawithField.xlsx', index_col=0)
Papers = pd.read_excel('PaperDatawithAuthorID.xlsx', index_col=0)



In [2]:
# Function to get the Field from list of Author IDs

def get_fields(IDs : list) -> list:
    if type(IDs) != list:
        IDs = eval(IDs)
    # Get the rows of the Authors table that match the IDs
    rows = Authors[Authors['AuthorID'].isin(IDs)]
    
    # Get the list of fields
    fields = rows['Field'].tolist()
    
    # Return the list of fields
    return fields

In [3]:
# apply Function to each paper to get the list of fields from Author IDs

Papers['author_field'] = Papers['AuthorIDs'].apply(get_fields)

In [4]:
# Define set for social science fields and qunatitative fields

social_science_fields = ['Political Science','Sociology','Economics']
qunatitative_fields = set(['Mathematics','Physics','Computer Science'])

In [5]:
# Define function to determine if Paper is aplicable

def is_applicable(paper):
    # Get the list of fields
    fields = paper['author_field']
    paper_categories = eval(paper['Category'])
    
    # Check if any of the fields are in the social science fields
    if not any(field in social_science_fields for field in paper_categories):
        return False
    
    # Check if any of the fields are in the quantitative fields or one one author has top field in quantitative fields

    is_quantitative = any(field in qunatitative_fields for field in paper_categories)
    is_top_quantitative = any(field in qunatitative_fields for field in fields)

    if not is_quantitative and not is_top_quantitative:
        return False
    
    # See if fields contain Biology
    if 'Biology' in fields:
        return False
    
    # Check paper is less than 10 computational social science Authors
    if len(eval(paper['AuthorIDs'])) > 10:
        return False
    
    # Check if paper is published after 2008
    if paper['year'] <= 2008:
        return False
    
    # Check if paper has DOI
    externals = eval(paper['externalIds'])
    
    if 'DOI' not in externals.keys():     
        return False
    
    if externals['DOI'] is None:
        return False
    
    # If all checks pass, return True
    return True


In [6]:
paper_is_applicable = []

for idx,paper in Papers.iterrows():
    paper_is_applicable.append(is_applicable(paper))


ccs_papers = Papers[paper_is_applicable]
ccs_papers.index = range(len(ccs_papers))

In [7]:
unique_ccs_papers_authors = set(id for IDS  in ccs_papers['AuthorIDs'] for id in eval(IDS))
len(unique_ccs_papers_authors)

1214

In [24]:
elements = len(ccs_papers)

for idx,paper in tqdm(ccs_papers.iterrows()):
    greaterThan = (paper['citationCount'] > ccs_papers['citationCount']).sum() - elements
    
    if greaterThan > -11:
        print(paper['title'])
        print('')


644it [00:00, 12284.85it/s]

Understanding images of groups of people

Quantifying Long-Term Scientific Impact

Demographics and Dynamics of Mechanical Turk Workers

The Leiden ranking 2011/2012: Data collection, indicators, and interpretation

Towards a new crown indicator: Some theoretical considerations

Cooperation and Contagion in Web-Based, Networked Public Goods Experiments

Fake news on Twitter during the 2016 U.S. presidential election

Economic Networks: The New Challenges

Complexity theory and financial regulation

Social Data: Biases, Methodological Pitfalls, and Ethical Boundaries






In [25]:
ccs_papers.to_excel('CCS_Papers.xlsx')