# find coauthor between 2 scientists

This notebook shows one example to visualize the relations between two scientist by using google scholar data obtained by [scholarly](https://pypi.org/project/scholarly/)  

In [1]:
%pip install scholarly scholarly[tor] streamlit-agraph pyngrok  httpx==0.27.2

Collecting scholarly
  Downloading scholarly-1.7.11-py3-none-any.whl.metadata (7.4 kB)
Collecting streamlit-agraph
  Downloading streamlit_agraph-0.0.45-py3-none-any.whl.metadata (3.2 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.2-py3-none-any.whl.metadata (8.4 kB)
Collecting httpx==0.27.2
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting httpcore==1.* (from httpx==0.27.2)
  Downloading httpcore-1.0.7-py3-none-any.whl.metadata (21 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx==0.27.2)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Collecting bibtexparser (from scholarly)
  Downloading bibtexparser-1.4.3.tar.gz (55 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.6/55.6 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting fake-useragent (from scholarly)
  Downloading fake_useragent-2.0.3-py3-none-any.whl.metadata (17 kB)
Collecting free-proxy (from sch

# write code to a .py file
streamlit need to run with local .py file

In [6]:
%%writefile app.py
import streamlit as st
from scholarly import scholarly, ProxyGenerator
from streamlit_agraph import agraph, Config, Node, Edge

# Set up proxy to avoid being banned by Google Scholar
# this will slow down the speed of excution
#pg = ProxyGenerator()
#pg.FreeProxies()
#scholarly.use_proxy(pg)

# Function to fetch co-authors for a researcher
def get_coauthors(name):
    try:
        search_query = scholarly.search_author(name)
        author = next(search_query, None)
        if author:
            detailed_author = scholarly.fill(author, sections=["coauthors"])
            coauthors = detailed_author.get("coauthors", [])
            return {coauthor["name"] for coauthor in coauthors}
        else:
            st.warning(f"No author found for the name: {name}")
            return set()
    except Exception as e:
        st.error(f"Error fetching co-authors: {e}")
        return set()

# Streamlit app
def main():
    st.title("Shared Co-authors Flowchart")
    
    # Input for researcher names
    name1 = st.text_input("Enter the first researcher's name")
    name2 = st.text_input("Enter the second researcher's name")

    # Initialize session state for nodes and edges
    if "nodes" not in st.session_state:
        st.session_state.nodes = []
    if "edges" not in st.session_state:
        st.session_state.edges = []

    if st.button("Generate Flowchart"):
        if name1 and name2:
            st.info("Searching for coauthors... This may take a few moments.")
            coauthors1 = get_coauthors(name1)
            coauthors2 = get_coauthors(name2)

            # Find shared co-authors
            shared_coauthors = coauthors1 & coauthors2
            
            # Clear previous nodes and edges
            st.session_state.nodes = [
                Node(id=name1, label=name1, color="blue"),
                Node(id=name2, label=name2, color="red"),
            ]
            st.session_state.edges = []

            # Add shared coauthors
            for coauthor in shared_coauthors:
                st.session_state.nodes.append(Node(id=coauthor, label=coauthor, color="green"))
                st.session_state.edges.append(Edge(source=name1, target=coauthor))
                st.session_state.edges.append(Edge(source=name2, target=coauthor))

            # Add unique coauthors for name1
            for coauthor in coauthors1 - shared_coauthors:
                st.session_state.nodes.append(Node(id=coauthor, label=coauthor, color="lightblue"))
                st.session_state.edges.append(Edge(source=name1, target=coauthor))

            # Add unique coauthors for name2
            for coauthor in coauthors2 - shared_coauthors:
                st.session_state.nodes.append(Node(id=coauthor, label=coauthor, color="pink"))
                st.session_state.edges.append(Edge(source=name2, target=coauthor))

    # Display flowchart
    config = Config(width=1600, height=900, directed=False, physics=True)
    st.subheader("Coauthor Connection Graph")
    agraph(
        nodes=st.session_state.nodes,
        edges=st.session_state.edges,
        config=config,
    )

if __name__ == "__main__":
    main()

Overwriting app.py


# run the app

## ignore this part if you are running this code locally
if you are running this notebook locally, ignore this part. otherwise you need to use ngrok to expose the port to public. a ngrok spi key is needed for this.

In [4]:
# only run this if you are runnning it in kaggle or colab.
from pyngrok import ngrok
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("ngrok_auth_key")


ngrok.set_auth_token(secret_value_0)
public_url = ngrok.connect(8501) # Expose port 8501
print(public_url)

NgrokTunnel: "https://8852-34-78-48-7.ngrok-free.app" -> "http://localhost:8501"                    


## run the code

In [5]:
!streamlit run app.py 


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.19.2.2:8501[0m
[34m  External URL: [0m[1mhttp://34.78.48.7:8501[0m
[0m
^C
[34m  Stopping...[0m
