# Libraries

In [1]:
# Google Scholar
# https://scholar.google.com/citations?user=sKSTKAoAAAAJ&hl=en
# https://scholar.google.com/citations?view_op=view_org&hl=en&org=1896398670060433590

import pickle
import numpy as np
import pandas as pd

from scholarly import scholarly
# https://github.com/scholarly-python-package/scholarly
# https://www.scraperapi.com/blog/best-google-scholar-apis-proxies/
from scholarly import ProxyGenerator

from random import randint
from time import sleep

from collections import Counter
from itertools import combinations

import json


# Import Datasets

In [2]:
df_listFaculty = pd.read_csv('df_listFaculty.csv')
df_listFaculty


Unnamed: 0,ID,Title,Area,Scholar ID
0,1762,Amir Yacoby,Applied Physics,slFLfLYAAAAJ
1,2626,Ann Pearson,Environmental Science & Engineering,Hwk4sP0AAAAJ
2,1795,Ariel Amir,Applied Mathematics,
3,13261,Ariel Procaccia,Computer Science,8ZpV-lkAAAAJ
4,1523,Barbara J. Grosz,Computer Science,
...,...,...,...,...
120,1713,Yaron Singer,Computer Science,j-MBXNMAAAAJ
121,1519,Yiling Chen,Computer Science,x_7xA0UAAAAJ
122,1651,Yue Lu,Electrical Engineering,84FUioAAAAAJ
123,1553,Zhigang Suo,Materials Science & Mechanical Engineering,GlBl-VUAAAAJ


In [3]:
with open('output_scholarly.pkl', 'rb') as input:
    list_scholarly = pickle.load(input)
    

# Generate Collaboration Data

### Create dictionary map between faculty name and Google Scholar ID

In [5]:
map_facultyID = dict(zip(df_listFaculty['Scholar ID'], df_listFaculty['Title']))
map_facultyID


{'slFLfLYAAAAJ': 'Amir Yacoby',
 'Hwk4sP0AAAAJ': 'Ann Pearson',
 'None': 'Zhiming Kuang',
 '8ZpV-lkAAAAJ': 'Ariel Procaccia',
 'I0fbJ6cAAAAJ': 'Boaz Barak',
 '9I4ltOIAAAAJ': 'Boris Kozinsky',
 'fABXPQQAAAAJ': 'Brian F. Farrell',
 'veDLTPEAAAAJ': 'Cengiz Pehlevan',
 '-XVcP5QAAAAJ': 'Christin Y. Sander',
 'IS_xUuIAAAAJ': 'Christopher Rycroft',
 'sKSTKAoAAAAJ': 'Conor J Walsh',
 'pxeyQ_QAAAAJ': 'Cynthia Dwork',
 'qmEw4LoAAAAJ': 'Cynthia Friend',
 '5C1bJKkAAAAJ': 'Daniel J. Jacob',
 'fKQ4QLYAAAAJ': 'Daniel P. Schrag',
 'R1cfTCoAAAAJ': 'David A. Weitz',
 'vXHA_XYAAAAJ': 'David Brooks',
 'tCCmmgQAAAAJ': 'David C. Bell',
 'wC0M5uoAAAAJ': 'David Clarke',
 'PfciJkgAAAAJ': 'David Keith',
 'ZflJqeUAAAAJ': 'David Mooney',
 'JUn8PgwAAAAJ': 'David Parkes',
 'jGboazkAAAAJ': 'David R. Nelson',
 'qHiACEgAAAAJ': 'Demba Ba',
 'UEgdNEoAAAAJ': 'Doeke Romke Hekstra',
 '3hzhsK4AAAAJ': 'Donald Elliot Ingber',
 'VMihW8oAAAAJ': 'Eddie Kohler',
 's9cO1fUAAAAJ': 'Efthimios Kaxiras',
 'C_r8d0AAAAAJ': 'Elena Leah G

### Create faculty links based on shared publications

In [6]:
dict_pubs = {}

for i in range(len(list_scholarly)):
    if list_scholarly[i] != 'None':
        author = map_facultyID[list_scholarly[i]['scholar_id']]
        list_pubs = list_scholarly[i]['publications']
        num_pubs = len(list_pubs)
        
        for i in range(num_pubs):
            pub_title = list_pubs[i]['bib']['title']
            
            if pub_title in dict_pubs and author not in dict_pubs[pub_title]:
                dict_pubs[pub_title].append(author)
            else:
                dict_pubs[pub_title] = [author]
                

In [7]:
dict_pubs_filter1 = {key:value for key,value in dict_pubs.items() if len(value) > 1}
print(len(dict_pubs_filter1))
print(list(dict_pubs_filter1.items())[:20])


1141
[('A robust scanning diamond sensor for nanoscale imaging with single nitrogen-vacancy centres', ['Amir Yacoby', 'Marko Loncar']), ('Integrated diamond networks for quantum nanophotonics', ['Amir Yacoby', 'Marko Loncar']), ('Enhanced single-photon emission from a diamond–silver aperture', ['Amir Yacoby', 'Marko Loncar']), ('Coherent optical transitions in implanted nitrogen vacancy centers', ['Amir Yacoby', 'Marko Loncar']), ('Inducing superconducting correlation in quantum Hall edge states', ['Amir Yacoby', 'Philip Kim']), ('Single-color centers implanted in diamond nanostructures', ['Amir Yacoby', 'Marko Loncar']), ('Imaging viscous flow of the Dirac fluid in graphene', ['Amir Yacoby', 'Philip Kim']), ('Atomically precise, custom-design origami graphene nanostructures', ['Amir Yacoby', 'Philip Kim']), ('Synthetic diamond materials for quantum and optical applications and methods of making the same', ['Amir Yacoby', 'Marko Loncar']), ('A robust, scanning quantum system for nanosc

In [8]:
authors_2 = Counter()

for authors in list(dict_pubs_filter1.values()):
    authors.sort()
    for comb in combinations(authors, 2):
        authors_2[comb] += 1

authors_2.most_common()[:20]


[(('David Brooks', 'Gu-Yeon Wei'), 153),
 (('Daniel J. Jacob', 'Steven C. Wofsy'), 60),
 (('Federico Capasso', 'Marko Loncar'), 55),
 (('Conor J Walsh', 'Robert J. Wood'), 50),
 (('Joost J. Vlassak', 'Zhigang Suo'), 46),
 (('Cynthia Friend', 'Efthimios Kaxiras'), 44),
 (('Eric Mazur', 'Marko Loncar'), 30),
 (('Daniel J. Jacob', 'Elsie M Sunderland'), 28),
 (('David Brooks', 'Vijay Janapa Reddi'), 24),
 (('Donald Elliot Ingber', 'Kit Parker'), 23),
 (('Jerry X. Mitrovica', 'Peter John Huybers'), 23),
 (('Gu-Yeon Wei', 'Vijay Janapa Reddi'), 21),
 (('Amir Yacoby', 'Philip Kim'), 19),
 (('Radhika Nagpal', 'Robert J. Wood'), 19),
 (('David Mooney', 'Donald Elliot Ingber'), 19),
 (('Conor J Walsh', 'David Mooney'), 18),
 (('Joanna Aizenberg', 'Marko Loncar'), 16),
 (('Gu-Yeon Wei', 'Robert J. Wood'), 14),
 (('David Clarke', 'Robert J. Wood'), 14),
 (('Joanna Aizenberg', 'Katia Bertoldi'), 14)]

In [9]:
links = []
for pair in authors_2.most_common():
    author1 = pair[0][0]
    author2 = pair[0][1]
    count = pair[1]
    links.append({
        "source": author1,
        "target": author2,
        "value": count
    })
    
links
    

[{'source': 'David Brooks', 'target': 'Gu-Yeon Wei', 'value': 153},
 {'source': 'Daniel J. Jacob', 'target': 'Steven C. Wofsy', 'value': 60},
 {'source': 'Federico Capasso', 'target': 'Marko Loncar', 'value': 55},
 {'source': 'Conor J Walsh', 'target': 'Robert J. Wood', 'value': 50},
 {'source': 'Joost J. Vlassak', 'target': 'Zhigang Suo', 'value': 46},
 {'source': 'Cynthia Friend', 'target': 'Efthimios Kaxiras', 'value': 44},
 {'source': 'Eric Mazur', 'target': 'Marko Loncar', 'value': 30},
 {'source': 'Daniel J. Jacob', 'target': 'Elsie M Sunderland', 'value': 28},
 {'source': 'David Brooks', 'target': 'Vijay Janapa Reddi', 'value': 24},
 {'source': 'Donald Elliot Ingber', 'target': 'Kit Parker', 'value': 23},
 {'source': 'Jerry X. Mitrovica', 'target': 'Peter John Huybers', 'value': 23},
 {'source': 'Gu-Yeon Wei', 'target': 'Vijay Janapa Reddi', 'value': 21},
 {'source': 'Amir Yacoby', 'target': 'Philip Kim', 'value': 19},
 {'source': 'Radhika Nagpal', 'target': 'Robert J. Wood', 'v

# Create faculty nodes with name and academic area

In [10]:
df_listFaculty


Unnamed: 0,ID,Title,Area,Scholar ID
0,1762,Amir Yacoby,Applied Physics,slFLfLYAAAAJ
1,2626,Ann Pearson,Environmental Science & Engineering,Hwk4sP0AAAAJ
2,1795,Ariel Amir,Applied Mathematics,
3,13261,Ariel Procaccia,Computer Science,8ZpV-lkAAAAJ
4,1523,Barbara J. Grosz,Computer Science,
...,...,...,...,...
120,1713,Yaron Singer,Computer Science,j-MBXNMAAAAJ
121,1519,Yiling Chen,Computer Science,x_7xA0UAAAAJ
122,1651,Yue Lu,Electrical Engineering,84FUioAAAAAJ
123,1553,Zhigang Suo,Materials Science & Mechanical Engineering,GlBl-VUAAAAJ


In [11]:
areaNumbers = {
    "Applied Mathematics": 0,
    "Applied Physics": 1,
    "Bioengineering": 2,
    "Computer Science": 3,
    "Electrical Engineering": 4,
    "Environmental Science & Engineering": 5,
    "Materials Science & Mechanical Engineering": 6
}


In [12]:
nodes = []
for i in range(len(df_listFaculty)):
    faculty = df_listFaculty['Title'][i]
    area = df_listFaculty['Area'][i]
    areaNumber = areaNumbers[area]
    
    nodes.append({
        "id": faculty,
        "group": areaNumber
    })
    
nodes
        

[{'id': 'Amir Yacoby', 'group': 1},
 {'id': 'Ann Pearson', 'group': 5},
 {'id': 'Ariel Amir', 'group': 0},
 {'id': 'Ariel Procaccia', 'group': 3},
 {'id': 'Barbara J. Grosz', 'group': 3},
 {'id': 'Boaz Barak', 'group': 3},
 {'id': 'Boris Kozinsky', 'group': 6},
 {'id': 'Brian F. Farrell', 'group': 5},
 {'id': 'Cengiz Pehlevan', 'group': 0},
 {'id': 'Cherry Murray', 'group': 1},
 {'id': 'Christin Y. Sander', 'group': 2},
 {'id': 'Christopher Joseph Lombardo', 'group': 6},
 {'id': 'Christopher Rycroft', 'group': 0},
 {'id': 'Conor J Walsh', 'group': 6},
 {'id': 'Cynthia Dwork', 'group': 3},
 {'id': 'Cynthia Friend', 'group': 6},
 {'id': 'Daniel J. Jacob', 'group': 5},
 {'id': 'Daniel Needleman', 'group': 1},
 {'id': 'Daniel P. Schrag', 'group': 5},
 {'id': 'David A. Weitz', 'group': 1},
 {'id': 'David Brooks', 'group': 3},
 {'id': 'David C. Bell', 'group': 1},
 {'id': 'David Clarke', 'group': 6},
 {'id': 'David J. Malan', 'group': 3},
 {'id': 'David Keith', 'group': 5},
 {'id': 'David Mo

# Export nodes and links as JSON

In [13]:
collabJson = {
    "nodes": nodes,
    "links": links
}

collabJson


{'nodes': [{'id': 'Amir Yacoby', 'group': 1},
  {'id': 'Ann Pearson', 'group': 5},
  {'id': 'Ariel Amir', 'group': 0},
  {'id': 'Ariel Procaccia', 'group': 3},
  {'id': 'Barbara J. Grosz', 'group': 3},
  {'id': 'Boaz Barak', 'group': 3},
  {'id': 'Boris Kozinsky', 'group': 6},
  {'id': 'Brian F. Farrell', 'group': 5},
  {'id': 'Cengiz Pehlevan', 'group': 0},
  {'id': 'Cherry Murray', 'group': 1},
  {'id': 'Christin Y. Sander', 'group': 2},
  {'id': 'Christopher Joseph Lombardo', 'group': 6},
  {'id': 'Christopher Rycroft', 'group': 0},
  {'id': 'Conor J Walsh', 'group': 6},
  {'id': 'Cynthia Dwork', 'group': 3},
  {'id': 'Cynthia Friend', 'group': 6},
  {'id': 'Daniel J. Jacob', 'group': 5},
  {'id': 'Daniel Needleman', 'group': 1},
  {'id': 'Daniel P. Schrag', 'group': 5},
  {'id': 'David A. Weitz', 'group': 1},
  {'id': 'David Brooks', 'group': 3},
  {'id': 'David C. Bell', 'group': 1},
  {'id': 'David Clarke', 'group': 6},
  {'id': 'David J. Malan', 'group': 3},
  {'id': 'David Keit

In [14]:
with open('faculty_collabs.json', 'w') as f:
    json.dump(collabJson, f, ensure_ascii=False, indent=4)
