In [None]:
from scholarly import scholarly
import time
from typing import Dict, List, Optional
import json

class ScholarScraper:
    def __init__(self):
        self.search_delay = 2

    def get_author_info(self, author_name: str, affiliation: str = "Syracuse University") -> Optional[Dict]:
        try:
            search_query = scholarly.search_author(f"{author_name} {affiliation}")
            author = next(search_query)
            author = scholarly.fill(author)
            
            # Split the author name to help with matching
            author_last_name = author_name.split()[-1].lower()
            
            # Initialize lists for primary and secondary authored papers
            primary_author_pubs = []
            secondary_author_pubs = []
            
            # Extract basic author info
            author_info = {
                "name": author['name'],
                "affiliation": author['affiliation'],
                "interests": author['interests'],
                "citedby": author['citedby'],
                "h_index": author['hindex'],
                "i10_index": author.get('i10index', 0),
                "top_primary_author_publications": [],
                "top_secondary_author_publications": []
            }
            
            # Process all publications
            for pub in author["publications"]:
                time.sleep(self.search_delay)
                try:
                    pub_filled = scholarly.fill(pub)
                    authors_list = pub_filled["bib"].get('author', '').split(' and ')
                    
                    publication = {
                        "title": pub_filled["bib"].get('title', ''),
                        "year": pub_filled["bib"].get('pub_year', ''),
                        "abstract": pub_filled["bib"].get('abstract', ''),
                        "venue": pub_filled["bib"].get('journal', pub_filled["bib"].get('conference', '')),
                        "citations": pub_filled.get("num_citations", 0),
                        "authors": authors_list,
                        "url": pub_filled["bib"].get('url', '')
                    }
                    
                    # Check if the author is primary (first) author or secondary author
                    if authors_list and (len(authors_list) == 1 or (author_last_name in authors_list[0].lower()) or (author_last_name in authors_list[1].lower())):
                        primary_author_pubs.append(publication)
                    else:
                        secondary_author_pubs.append(publication)
                    if len(primary_author_pubs) >= 3 and len(secondary_author_pubs) >= 3:
                        break
                        
                except Exception as e:
                    print(f"Error fetching publication details: {str(e)}")
                    continue
            
            # Sort by citations and get top 5 for each category
            primary_author_pubs.sort(key=lambda x: x['citations'], reverse=True)
            secondary_author_pubs.sort(key=lambda x: x['citations'], reverse=True)
            
            author_info["top_primary_author_publications"] = primary_author_pubs[0:3]
            author_info["top_secondary_author_publications"] = secondary_author_pubs[0:3]
            
            return author_info
            
        except StopIteration:
            print(f"No author found with name: {author_name}")
            return None
        except Exception as e:
            print(f"Error: {str(e)}")
            return None

def main():
    scraper = ScholarScraper()
    faculty_name = "Jeongmin Ahn"
    print(f"Searching for: {faculty_name}")
    
    author_info = scraper.get_author_info(faculty_name)
    
    if author_info:
        # Save to JSON file
        output_file = f"faculty_scholarly/{faculty_name.replace(' ', '_').lower()}_scholar_data.json"
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(author_info, f, indent=2, ensure_ascii=False)
        
        # Print summary
        # print("\nAuthor Information:")
        # print(f"Name: {author_info['name']}")
        # print(f"Affiliation: {author_info['affiliation']}")
        # print(f"Research Interests: {', '.join(author_info['interests'])}")
        # print(f"Total Citations: {author_info['citedby']}")
        # print(f"h-index: {author_info['h_index']}")
        
        # print("\nTop 3 Publications as Primary Author:")
        # for i, pub in enumerate(author_info['top_primary_author_publications'], 1):
        #     print(f"\n{i}. {pub['title']} ({pub['year']})")
        #     print(f"   Citations: {pub['citations']}")
        #     print(f"   Authors: {', '.join(pub['authors'])}")
        #     print(f"   Venue: {pub['venue']}")
            
        # print("\nTop 3 Publications as Secondary Author:")
        # for i, pub in enumerate(author_info['top_secondary_author_publications'], 1):
        #     print(f"\n{i}. {pub['title']} ({pub['year']})")
        #     print(f"   Citations: {pub['citations']}")
        #     print(f"   Authors: {', '.join(pub['authors'])}")
        #     print(f"   Venue: {pub['venue']}")

if __name__ == "__main__":
    main()

Searching for: Jeongmin Ahn


In [2]:
import pandas as pd

faculty_data = pd.read_csv('ecs_faculty_staff.csv')
faculty_data.head()

Unnamed: 0,name,profile_link
0,Mo Abdallah,https://ecs.syracuse.edu/faculty-staff/mo-abda...
1,Riyad S. Aboutaha,https://ecs.syracuse.edu/faculty-staff/riyad-s...
2,Nicole Adkins,https://ecs.syracuse.edu/faculty-staff/nicole-...
3,Jeongmin Ahn,https://ecs.syracuse.edu/faculty-staff/jeongmi...
4,Ben Akih-Kumgeh,https://ecs.syracuse.edu/faculty-staff/ben-aki...


In [3]:
faculty_names = faculty_data['name'].tolist()
faculty_names

['Mo Abdallah',
 'Riyad S. Aboutaha',
 'Nicole Adkins',
 'Jeongmin Ahn',
 'Ben Akih-Kumgeh',
 'Alaa Alawaad',
 'Sharon Alestalo',
 'Sahar Almahmoud',
 'Jackie Anderson',
 'Michelle Anson-Silverstein',
 'Chris Barbera',
 'Marjory Baruch',
 'Hind BenGabr',
 'Shelby Bergen',
 'Gabriela Bermudez',
 'Shobha K. Bhatia',
 'Michael Blatchley',
 'Michelle M. Blum',
 'Ed Bogucz',
 'Jesse Q. Bond',
 'Kathryn Bradford',
 'Thomas E. Braga',
 'Tim Breen',
 'Cynthia M. Bromka-Skafidas',
 'Mary Beth Browning Monroe',
 'Katie Cadwell',
 'Yue Cao',
 'Heather Carroll',
 'Elizabeth Carter',
 'David Chandler',
 'C.Y. Roger Chen',
 'Ruth Chen',
 'Biao Chen',
 'Shiu-Kai Chin',
 'Nicholas Clarke',
 'Megan Claxton',
 'Sam Clemence',
 'Sarah Collins',
 'Andria Costello Staniec',
 'Don Crampton',
 'John F. Dannenhoffer III',
 'Cliff I. Davidson',
 'Gabriel Silva De Oliveira',
 'Alexander Deyhim',
 'Rick DiRubbo',
 'Karen Dixon-Cherebin',
 'Bing Dong',
 'Charles T. Driscoll',
 'Victor Duenas',
 'Alex Dunbar',
 'J

In [None]:
scraper = ScholarScraper()

parsing_failed_faculty_names = []

for faculty_name in faculty_names:
    print(f"Searching for: {faculty_name}")
    
    author_info = scraper.get_author_info(faculty_name)
    
    if author_info:
        # Save to JSON file
        output_file = f"faculty_scholarly/{faculty_name.replace(' ', '_').lower()}_scholar_data.json"
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(author_info, f, indent=2, ensure_ascii=False)
    else:
        parsing_failed_faculty_names.append(faculty_name)

Searching for: Mo Abdallah
No author found with name: Mo Abdallah
Searching for: Riyad S. Aboutaha
No author found with name: Riyad S. Aboutaha
Searching for: Nicole Adkins
No author found with name: Nicole Adkins
Searching for: Jeongmin Ahn
Searching for: Ben Akih-Kumgeh
No author found with name: Ben Akih-Kumgeh
Searching for: Alaa Alawaad
No author found with name: Alaa Alawaad
Searching for: Sharon Alestalo
Searching for: Sahar Almahmoud
No author found with name: Sahar Almahmoud
Searching for: Jackie Anderson
No author found with name: Jackie Anderson
Searching for: Michelle Anson-Silverstein
No author found with name: Michelle Anson-Silverstein
Searching for: Chris Barbera
No author found with name: Chris Barbera
Searching for: Marjory Baruch
No author found with name: Marjory Baruch
Searching for: Hind BenGabr
No author found with name: Hind BenGabr
Searching for: Shelby Bergen
No author found with name: Shelby Bergen
Searching for: Gabriela Bermudez
No author found with name: G

In [6]:
txt = """Searching for: Mo Abdallah
No author found with name: Mo Abdallah
Searching for: Riyad S. Aboutaha
No author found with name: Riyad S. Aboutaha
Searching for: Nicole Adkins
No author found with name: Nicole Adkins
Searching for: Jeongmin Ahn
Searching for: Ben Akih-Kumgeh
No author found with name: Ben Akih-Kumgeh
Searching for: Alaa Alawaad
No author found with name: Alaa Alawaad
Searching for: Sharon Alestalo
Searching for: Sahar Almahmoud
No author found with name: Sahar Almahmoud
Searching for: Jackie Anderson
No author found with name: Jackie Anderson
Searching for: Michelle Anson-Silverstein
No author found with name: Michelle Anson-Silverstein
Searching for: Chris Barbera
No author found with name: Chris Barbera
Searching for: Marjory Baruch
No author found with name: Marjory Baruch
Searching for: Hind BenGabr
No author found with name: Hind BenGabr
Searching for: Shelby Bergen
No author found with name: Shelby Bergen
Searching for: Gabriela Bermudez
No author found with name: Gabriela Bermudez
Searching for: Shobha K. Bhatia
No author found with name: Shobha K. Bhatia
Searching for: Michael Blatchley
Searching for: Michelle M. Blum
Searching for: Ed Bogucz
Searching for: Jesse Q. Bond
Searching for: Kathryn Bradford
No author found with name: Kathryn Bradford
Searching for: Thomas E. Braga
No author found with name: Thomas E. Braga
Searching for: Tim Breen
No author found with name: Tim Breen
Searching for: Cynthia M. Bromka-Skafidas
No author found with name: Cynthia M. Bromka-Skafidas
Searching for: Mary Beth Browning Monroe
No author found with name: Mary Beth Browning Monroe
Searching for: Katie Cadwell
No author found with name: Katie Cadwell
Searching for: Yue Cao
Searching for: Heather Carroll
No author found with name: Heather Carroll
Searching for: Elizabeth Carter
Searching for: David Chandler
Searching for: C.Y. Roger Chen
No author found with name: C.Y. Roger Chen
Searching for: Ruth Chen
No author found with name: Ruth Chen
Searching for: Biao Chen
Searching for: Shiu-Kai Chin
No author found with name: Shiu-Kai Chin
Searching for: Nicholas Clarke
No author found with name: Nicholas Clarke
Searching for: Megan Claxton
No author found with name: Megan Claxton
Searching for: Sam Clemence
No author found with name: Sam Clemence
Searching for: Sarah Collins
No author found with name: Sarah Collins
Searching for: Andria Costello Staniec
No author found with name: Andria Costello Staniec
Searching for: Don Crampton
No author found with name: Don Crampton
Searching for: John F. Dannenhoffer III
No author found with name: John F. Dannenhoffer III
Searching for: Cliff I. Davidson
No author found with name: Cliff I. Davidson
Searching for: Gabriel Silva De Oliveira
No author found with name: Gabriel Silva De Oliveira
Searching for: Alexander Deyhim
Searching for: Rick DiRubbo
No author found with name: Rick DiRubbo
Searching for: Karen Dixon-Cherebin
No author found with name: Karen Dixon-Cherebin
Searching for: Bing Dong
Searching for: Charles T. Driscoll
Searching for: Victor Duenas
Searching for: Alex Dunbar
No author found with name: Alex Dunbar
Searching for: Jennifer Duque
No author found with name: Jennifer Duque
Searching for: Sara Eftekharnejad
Searching for: Ehat Ercanli
No author found with name: Ehat Ercanli
Searching for: Matthew Erdman
No author found with name: Matthew Erdman
Searching for: Makan Fardad
Searching for: Jennifer Fazio
No author found with name: Jennifer Fazio
Searching for: Yoanna Ferrara
No author found with name: Yoanna Ferrara
Searching for: Eric Finkelstein
Searching for: Abby Fite
No author found with name: Abby Fite
Searching for: Heather Flaherty
No author found with name: Heather Flaherty
Searching for: Amelia Forbes
No author found with name: Amelia Forbes
Searching for: Chelsey Franza
No author found with name: Chelsey Franza
Searching for: Mia Funderburg
No author found with name: Mia Funderburg
Searching for: Zhenyu Gan
Searching for: Venkata S.S. Gandikota
No author found with name: Venkata S.S. Gandikota
Searching for: Natarajan Gautam
No author found with name: Natarajan Gautam
Searching for: Nadeem Ghani
No author found with name: Nadeem Ghani
Searching for: Prasanta K. Ghosh
No author found with name: Prasanta K. Ghosh
Searching for: Michael Glinski
No author found with name: Michael Glinski
Searching for: Jennifer W. Graham
No author found with name: Jennifer W. Graham
Searching for: Amy Gullotta
No author found with name: Amy Gullotta
Searching for: M. Cenk Gursoy
Searching for: Matthew Hanley
No author found with name: Matthew Hanley
Searching for: Moamer (Mo) Hasanovic
No author found with name: Moamer (Mo) Hasanovic
Searching for: Julie M. Hasenwinkel
No author found with name: Julie M. Hasenwinkel
Searching for: James (Jay) Henderson
Searching for: Endadul Hoque
Searching for: Ian D. Hosein
Searching for: Jonathan Hoster
No author found with name: Jonathan Hoster
Searching for: Wei (Frank) Hu
No author found with name: Wei (Frank) Hu
Searching for: Kara Hughes
No author found with name: Kara Hughes
Searching for: Britton Inglehart
No author found with name: Britton Inglehart
Searching for: Can Isik
No author found with name: Can Isik
Searching for: Era Jain
Searching for: Pankaj K. Jha
Searching for: Chris E. Johnson
Searching for: Alex K. Jones
No author found with name: Alex K. Jones
Searching for: Andrea Joseph
No author found with name: Andrea Joseph
Searching for: Kathleen M. Joyce
No author found with name: Kathleen M. Joyce
Searching for: Michaela Karcher
No author found with name: Michaela Karcher
Searching for: Sue Karlik
No author found with name: Sue Karlik
Searching for: Garrett Ethan Katz
No author found with name: Garrett Ethan Katz
Searching for: Sean K. Kelly
No author found with name: Sean K. Kelly
Searching for: Lance S. Ketcham
No author found with name: Lance S. Ketcham
Searching for: Bryan S. Kim
Searching for: Shawn L. Knight
No author found with name: Shawn L. Knight
Searching for: Mary Margaret Koppers
No author found with name: Mary Margaret Koppers
Searching for: Rodrick Kuate Defo
No author found with name: Rodrick Kuate Defo
Searching for: Saman Priyantha Kumarawadu
No author found with name: Saman Priyantha Kumarawadu
Searching for: Lihong Lao
No author found with name: Lihong Lao
Searching for: Taylor Larsen
No author found with name: Taylor Larsen
Searching for: Kasey Laurent
No author found with name: Kasey Laurent
Searching for: Andrew C. Lee
No author found with name: Andrew C. Lee
Searching for: Andra Lee
No author found with name: Andra Lee
Searching for: Tong Lin
Searching for: Xiyuan Liu
Searching for: Weisi Liu
No author found with name: Weisi Liu
Searching for: Min Liu
Searching for: Yizhi Liu
Searching for: Linda Lowe
No author found with name: Linda Lowe
Searching for: Eric M. Lui
No author found with name: Eric M. Lui
Searching for: Aoyi Luo
Searching for: Zhen Ma
Searching for: Sinéad C. Mac Namara
No author found with name: Sinéad C. Mac Namara
Searching for: Sarah Mack
No author found with name: Sarah Mack
Searching for: Paul Mackanos
No author found with name: Paul Mackanos
Searching for: Melissa Maldanado
No author found with name: Melissa Maldanado
Searching for: Kwami Maranga
No author found with name: Kwami Maranga
Searching for: Duane L. Marcy
No author found with name: Duane L. Marcy
Searching for: Shalabh C. Maroo
Searching for: Hong Martel
No author found with name: Hong Martel
Searching for: Karen Martinez Soto
Searching for: Joao Paulo Marum
Searching for: Jean-Daniel Medjo
No author found with name: Jean-Daniel Medjo
Searching for: Kristopher Micinski
Searching for: Aaron Mohammed
Searching for: Chilukuri K. Mohan
No author found with name: Chilukuri K. Mohan
Searching for: Bruce Molino
No author found with name: Bruce Molino
Searching for: Mario Montesdeoca
No author found with name: Mario Montesdeoca
Searching for: Young B. Moon
Searching for: Mary Murphy
No author found with name: Mary Murphy
Searching for: Shikha Nangia
Searching for: Dawit Negussey
No author found with name: Dawit Negussey
Searching for: Rebecca Noble
No author found with name: Rebecca Noble
Searching for: Jae C. Oh
No author found with name: Jae C. Oh
Searching for: Susan Older
No author found with name: Susan Older
Searching for: Anupam Pandey
Searching for: Steven Penn
No author found with name: Steven Penn
Searching for: Malea Perkins
No author found with name: Malea Perkins
Searching for: Vir V. Phoha
Searching for: Jeremy Pierce
No author found with name: Jeremy Pierce
Searching for: Peter Plumley
No author found with name: Peter Plumley
Searching for: Jason Pollack
Searching for: Savonne Proctor
No author found with name: Savonne Proctor
Searching for: Kate Pynn
No author found with name: Kate Pynn
Searching for: Quinn Qiao
Searching for: Zhao Qin
Searching for: Qinru Qiu
Searching for: Younes Ra’di
Searching for: Farzana Rahman
Searching for: Sabina Redington
No author found with name: Sabina Redington
Searching for: Allen Reed
No author found with name: Allen Reed
Searching for: Dacheng Ren
Searching for: Jorge Romeu
No author found with name: Jorge Romeu
Searching for: Tamara Rosanio
No author found with name: Tamara Rosanio
Searching for: Utpal Roy
No author found with name: Utpal Roy
Searching for: Cynthia Salanger
No author found with name: Cynthia Salanger
Searching for: Baris Salman
Searching for: Sarah Samir
No author found with name: Sarah Samir
Searching for: Ashok S. Sangani
No author found with name: Ashok S. Sangani
Searching for: Amit K. Sanyal
No author found with name: Amit K. Sanyal
Searching for: Mehmet Sarimurat
No author found with name: Mehmet Sarimurat
Searching for: Wanliang Shan
Searching for: Ian M. Shapiro
No author found with name: Ian M. Shapiro
Searching for: Jim Sheedy
No author found with name: Jim Sheedy
Searching for: Andrea Shen
No author found with name: Andrea Shen
Searching for: Yilei Shi
No author found with name: Yilei Shi
Searching for: Cindy Smith
No author found with name: Cindy Smith
Searching for: J. Cole Smith
Searching for: Lilly Smolak
No author found with name: Lilly Smolak
Searching for: Pranav Soman
Searching for: Sucheta Soundarajan
Searching for: Gabrielle Spencer
No author found with name: Gabrielle Spencer
Searching for: James Spoelstra
No author found with name: James Spoelstra
Searching for: David Stablein
No author found with name: David Stablein
Searching for: Chris Stathatos
No author found with name: Chris Stathatos
Searching for: Emilia Stojanovski
No author found with name: Emilia Stojanovski
Searching for: Yiyang Sun
Searching for: Radhakrishna (Suresh) Sureshkumar
No author found with name: Radhakrishna (Suresh) Sureshkumar
Searching for: Yuzhe Tang
Searching for: Svetoslava Todorova
Searching for: John Trimmer
Searching for: Yoko Urao
No author found with name: Yoko Urao
Searching for: William E. VanNordstrand
No author found with name: William E. VanNordstrand
Searching for: Pramod K. Varshney
No author found with name: Pramod K. Varshney
Searching for: Gabriela Vásquez Jiménez
No author found with name: Gabriela Vásquez Jiménez
Searching for: Stephanie Vasta
No author found with name: Stephanie Vasta
Searching for: Senem Velipasalar
Searching for: Kelly Venturini
No author found with name: Kelly Venturini
Searching for: Joseph J. Waclawski
No author found with name: Joseph J. Waclawski
Searching for: Theodore Walker
Searching for: Li Wang
No author found with name: Li Wang
Searching for: Yeqing Wang
Searching for: Yaoying Wu
Searching for: Matt Yager
No author found with name: Matt Yager
Searching for: Melissa Young
No author found with name: Melissa Young
Searching for: Edmund S. Yu
No author found with name: Edmund S. Yu
Searching for: Pun To (Douglas) Yung
No author found with name: Pun To (Douglas) Yung
Searching for: Reza Zafarani
Searching for: Teng Zeng
Searching for: Jialiu Zeng
Searching for: Jianshun “Jensen” Zhang
Searching for: Teng Zhang
Searching for: Junzhe Zhang
Searching for: Yi Zheng
Searching for: Fernando Zigunov
"""

parsing_failed_faculty_names = [
        line.replace('No author found with name:', '').strip()
        for line in txt.splitlines()
        if line.startswith('No author found with name:')
    ]

parsing_failed_faculty_names

['Mo Abdallah',
 'Riyad S. Aboutaha',
 'Nicole Adkins',
 'Ben Akih-Kumgeh',
 'Alaa Alawaad',
 'Sahar Almahmoud',
 'Jackie Anderson',
 'Michelle Anson-Silverstein',
 'Chris Barbera',
 'Marjory Baruch',
 'Hind BenGabr',
 'Shelby Bergen',
 'Gabriela Bermudez',
 'Shobha K. Bhatia',
 'Kathryn Bradford',
 'Thomas E. Braga',
 'Tim Breen',
 'Cynthia M. Bromka-Skafidas',
 'Mary Beth Browning Monroe',
 'Katie Cadwell',
 'Heather Carroll',
 'C.Y. Roger Chen',
 'Ruth Chen',
 'Shiu-Kai Chin',
 'Nicholas Clarke',
 'Megan Claxton',
 'Sam Clemence',
 'Sarah Collins',
 'Andria Costello Staniec',
 'Don Crampton',
 'John F. Dannenhoffer III',
 'Cliff I. Davidson',
 'Gabriel Silva De Oliveira',
 'Rick DiRubbo',
 'Karen Dixon-Cherebin',
 'Alex Dunbar',
 'Jennifer Duque',
 'Ehat Ercanli',
 'Matthew Erdman',
 'Jennifer Fazio',
 'Yoanna Ferrara',
 'Abby Fite',
 'Heather Flaherty',
 'Amelia Forbes',
 'Chelsey Franza',
 'Mia Funderburg',
 'Venkata S.S. Gandikota',
 'Natarajan Gautam',
 'Nadeem Ghani',
 'Prasant

In [7]:
len(parsing_failed_faculty_names)

137

In [8]:
from semantic_scholar_scraper import SemanticScholarScraper

scraper = SemanticScholarScraper()
parsing_failed_faculty_names_1 = []
for author_name in parsing_failed_faculty_names:
    output_file = f"faculty_scholarly/{author_name.replace(' ', '_').lower()}_semanticscholar_data.json"
    author_data = scraper.get_author_data(author_name)

    if author_data:
        scraper.save_to_json(author_data, output_file)
    else:
        parsing_failed_faculty_names_1.append(author_name)

Searching for author: Mo Abdallah
Data successfully saved to faculty_scholarly/mo_abdallah_semanticscholar_data.json
Searching for author: Riyad S. Aboutaha
Data successfully saved to faculty_scholarly/riyad_s._aboutaha_semanticscholar_data.json
Searching for author: Nicole Adkins
Error making request: 429 Client Error:  for url: https://api.semanticscholar.org/graph/v1/author/153820847?fields=name%2Caffiliations%2CcitationCount%2ChIndex%2CpaperCount%2Cpapers.title%2Cpapers.year%2Cpapers.abstract%2Cpapers.venue%2Cpapers.citationCount%2Cpapers.authors%2Cpapers.url
No author found with name:  Nicole Adkins
Searching for author: Ben Akih-Kumgeh
Error making request: 429 Client Error:  for url: https://api.semanticscholar.org/graph/v1/author/1422512892?fields=name%2Caffiliations%2CcitationCount%2ChIndex%2CpaperCount%2Cpapers.title%2Cpapers.year%2Cpapers.abstract%2Cpapers.venue%2Cpapers.citationCount%2Cpapers.authors%2Cpapers.url
No author found with name:  Ben Akih-Kumgeh
Searching for aut

In [10]:
len(parsing_failed_faculty_names_1)

108

In [13]:
updated_faculty_list = pd.read_csv("ecs_faculty_staff.csv")['name'].tolist()
updated_faculty_list

['Mo Abdallah',
 'Riyad S. Aboutaha',
 'Jeongmin Ahn',
 'Ben Akih-Kumgeh',
 'Jackie Anderson',
 'Michael Blatchley',
 'Michelle M. Blum',
 'Ed Bogucz',
 'Jesse Q. Bond',
 'Mary Beth Browning Monroe',
 'Katie Cadwell',
 'Yue Cao',
 'Elizabeth Carter',
 'C.Y. Roger Chen',
 'Ruth Chen',
 'Biao Chen',
 'Andria Costello Staniec',
 'John F. Dannenhoffer III',
 'Gabriel Silva De Oliveira',
 'Alexander Deyhim',
 'Bing Dong',
 'Charles T. Driscoll',
 'Victor Duenas',
 'Sara Eftekharnejad',
 'Ehat Ercanli',
 'Matthew Erdman',
 'Makan Fardad',
 'Zhenyu Gan',
 'Venkata S.S. Gandikota',
 'Natarajan Gautam',
 'Nadeem Ghani',
 'Prasanta K. Ghosh',
 'Jennifer W. Graham',
 'M. Cenk Gursoy',
 'Moamer (Mo) Hasanovic',
 'James (Jay) Henderson',
 'Endadul Hoque',
 'Ian D. Hosein',
 'Can Isik',
 'Era Jain',
 'Pankaj K. Jha',
 'Chris E. Johnson',
 'Alex K. Jones',
 'Andrea Joseph',
 'Garrett Ethan Katz',
 'Bryan S. Kim',
 'Rodrick Kuate Defo',
 'Saman Priyantha Kumarawadu',
 'Lihong Lao',
 'Kasey Laurent',
 

In [14]:
# Extract faculty names from parsing_failed_faculty_names_1 that failed to parse and which are in updated faculty list
import os

failed_faculty_names = []
for faculty_name in parsing_failed_faculty_names_1:
    if faculty_name in updated_faculty_list:
        failed_faculty_names.append(faculty_name)

failed_faculty_names, len(failed_faculty_names)

(['Ben Akih-Kumgeh',
  'Katie Cadwell',
  'C.Y. Roger Chen',
  'Andria Costello Staniec',
  'Gabriel Silva De Oliveira',
  'Matthew Erdman',
  'Venkata S.S. Gandikota',
  'Natarajan Gautam',
  'Prasanta K. Ghosh',
  'Jennifer W. Graham',
  'Moamer (Mo) Hasanovic',
  'Can Isik',
  'Alex K. Jones',
  'Andrea Joseph',
  'Garrett Ethan Katz',
  'Rodrick Kuate Defo',
  'Saman Priyantha Kumarawadu',
  'Lihong Lao',
  'Andrew C. Lee',
  'Eric M. Lui',
  'Sinéad C. Mac Namara',
  'Duane L. Marcy',
  'Jean-Daniel Medjo',
  'Dawit Negussey',
  'Peter Plumley',
  'Utpal Roy',
  'Ashok S. Sangani',
  'Amit K. Sanyal',
  'Mehmet Sarimurat',
  'Andrea Shen',
  'Yilei Shi',
  'Cindy Smith',
  'Radhakrishna (Suresh) Sureshkumar',
  'Pramod K. Varshney',
  'Joseph J. Waclawski',
  'Li Wang',
  'Pun To (Douglas) Yung'],
 37)