In [None]:
#%%appyter init
from appyter import magic
magic.init(lambda _=globals: _())

In [None]:
%%appyter hide_code_exec
{% do SectionField(
    name='Data_Section1',
    title='Input the name of Investigator and the ORCID ID as an optional input if known'
) %}

In [None]:
%%appyter code_exec
{% set researcher_name = StringField(
    name='researcher_name', 
    label='Investigator Name', 
    default="Robert J. Lefkowitz", 
    description='Input full name of PI along with middle names/middle initials followed by spaces as leaving it out may change the results of certain information', 
    section='Data_Section1', 
    required = True
    )
%}
{% set orcid = StringField(
    constraint= "[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{4}|.{0}|[0-9]{4}",
    name='researcher_id', 
    label='(Optional) Investigator Orcid ID if Known', 
    default='', 
    description='Input the ORCID associated with the researcher if known in addition to the inputted name.', 
    section='Data_Section1'
    )
%}
researcher_name = {{researcher_name}}
orcid_id = {{orcid}}

In [None]:
split_name = researcher_name.split()
name_of_researcher = researcher_name.split()[0] + " " + researcher_name.split()[-1]
pubmed_name = split_name[-1] + " " +split_name[0][0]
if len(split_name) > 2:
    pubmed_name = split_name[-1] + " "
    for name in split_name[:-1]:
        pubmed_name += name[0]

In [None]:
%%appyter markdown
# Researcher Summary Report for {{researcher_name.raw_value}}
This notebook takes as input a researcher name and outputs information about the researcher from various sources including PubMed, RePORTER, Google Scholar, Geneshot, and Drugshot. The information that is currently included in the report is publications, grants, citations, collaborations, associated genes and drugs, and additional information related to the researcher's affiliation and topics of research interests. The report provides graphs and tables. 

In [None]:
#import packages
import json
import os
from time import sleep
from Bio import Entrez
from collections import defaultdict
import requests
import plotly.express as px
import plotly
from IPython.display import display,FileLink, HTML, Markdown, IFrame
from scholarly import scholarly
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import re
from urllib.parse import urlencode
from utils import *
# Setting Entrez tool parameter
Entrez.email = 'nasheath.ahmed@mssm.edu'
Entrez.tool = 'Demoscript'

In [None]:
use_orcid = False
if re.search("[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{4}", orcid_id):
    use_orcid = True
elif re.search("[0-9]{16}", orcid_id):
    use_orcid = True
    orcid_id = orcid_id[:4]+'-'+orcid_id[4:8]+'-'+ orcid_id[8:12]+orcid_id[12:]

In [None]:
figure_counter = 1
#os.makedirs("output_images/", exist_ok=True)
output_folder = ""


In [None]:
%%appyter markdown
### ORCID Information Querying

In [None]:
if use_orcid:
    display(Markdown("Link to [Orcid](https://orcid.org/+{})".format(orcid_id)))
    headers = {
        'Accept': 'application/json'
    }
    search_url = f"https://pub.orcid.org/v3.0/{orcid_id}/record"
    response = requests.get(search_url, headers = headers)
    education_list = []
    orcid_institution = None
    if response.status_code == 200:
        data = response.json()
        print(data.keys())
        if 'person' in data:
            value = data['person']
            first_name = value['name']['given-names']['value']
            last_name = value['name']['family-name']['value']
            #If input name matches the orcid id, take some of the orcid id information about the researcher
            if first_name in name_of_researcher.split()[0] and last_name in name_of_researcher.split()[-1]:
                print("Matching orcid id to person name,", name_of_researcher, first_name, last_name)
                if 'activities-summary' in data:
                    value = data['activities-summary']
                    if 'educations' in value:
                        for education in value['educations']['affiliation-group']:
                            education_dict = education['summaries'][0]['education-summary']
                            string_for_education = ''
                            if 'organization' in education_dict:
                                if 'name' in education_dict['organization']:
                                    string_for_education += education_dict['organization']['name'] + ":"
                            if 'role-title' in education_dict:
                                string_for_education += education_dict['role-title']
                            if 'department-name' in education_dict:
                                string_for_education += " in " + education_dict['department-name']
                            print(string_for_education)
                            education_list.append(string_for_education)
                    if 'employments' in value:
                        if len(value['employments']['affiliation-group']) > 0:
                            employment_dict = value['employments']['affiliation-group'][0]['summaries'][0]['employment-summary']
                            if 'organization' in employment_dict:
                                orcid_institution = employment_dict['organization']['name']
                        print(orcid_institution)
            else:
                print('ORCID ID does not match to person name,', name_of_researcher, first_name, last_name)

    else:
        print("There was an error in the API request to ORCID,", response.status_code)
else:
    headers = {
        'Accept': 'application/json',
    }

    first_name = split_name[0]
    last_name = split_name[-1]

    search_url = f"https://pub.orcid.org/v3.0/search/?q=family-name:{last_name}+AND+given-names:{first_name}&rows=10"

    response = requests.get(search_url, headers=headers)
    if response.status_code == 200:
        data = response.json()
        if 'result' in data and data['result'] != None:
            if data['num-found'] == 1 :
                orcid_id = str(data["result"][0]["orcid-identifier"]["path"])
                print("Researcher's possible ORCID ID")
                print(orcid_id)
            else:
                print("Multiple Orcid ID identifiers that match name of researcher")
                for orcid_data in data['result']:
                    print(orcid_data["orcid-identifier"]["path"])
        else:
            print("No ORCID info for this researcher.")

    else:
        print("No Orcid ID for this researcher.")
    

In [None]:
%%appyter markdown
## Obtaining Citation and Summary information from Google Scholar, Wikipedia, and OpenAlex API for {{researcher_name.raw_value}}
The card information may be repeated for some of the information and the information may vary based on the source it came from based off how the resource collects the information. 

In [None]:
citation_dict = query_google_citation(name_of_researcher)
getting_information_from_wiki(researcher_name)
getting_information_from_openalex(name_of_researcher)


In [None]:
if citation_dict != None:
    display(Markdown("## Citation Information (from Google Scholar)"))
    display(Markdown("The citation information counts is taken from the profile page of the given author name from [Google Scholar](https://scholar.google.com/schhp?hl=en)"))
    fig = make_bar_plot(citation_dict,'Year', "Citations", f"Citations each Year for {name_of_researcher}", "Sourced from Google Scholar")
    fig_line = make_line_plot(citation_dict, 'Year', "Citations", f"Cumulative Citations each Year for {name_of_researcher}", "Sourced from Google Scholar")

    fig.show()
    fig.write_image(output_folder+'citations_bar_google.png')
    figure_counter = display_figure_labels(figure_counter, "Citations that are connected to the publications each year for {}.".format(name_of_researcher), title = 'citations_bar_google')

    fig_line.show()
    fig_line.write_image(output_folder+'citations_line_graph_google.png')
    figure_counter = display_figure_labels(figure_counter, "The cumulative citations that are connected to the publications each year for {}".format(name_of_researcher), title = 'citations_line_graph_google')

# print("Searching for Citation Information from Semantic Scholar for {}".format(name_of_researcher))
semantic_citation_dict = query_semantic_scholar_citation(name_of_researcher)

if semantic_citation_dict != None and len(semantic_citation_dict)> 0:
    display(Markdown("## Citation Information (from Semantic Scholar)"))
    display(Markdown("The citation information counts is taken from the profile page of the given author name from [Semantic Scholar](https://www.semanticscholar.org/)"))

    fig = make_bar_plot(semantic_citation_dict,'Year', "Citations", f"Citations each Year for {name_of_researcher}", "Sourced from Semantic Scholar")
    fig_line = make_line_plot(semantic_citation_dict, 'Year', "Citations", f"Cumulative Citations each Year for {name_of_researcher}", "Sourced from Semantic Scholar")
    fig.show()
    fig.write_image(output_folder+'citations_bar_semantic.png')
    figure_counter = display_figure_labels(figure_counter, "Citations that are connected to the publications each year for {}.".format(name_of_researcher), title = 'citations_bar_semantic')

    fig_line.show()
    fig_line.write_image(output_folder+'citations_line_graph_semantic.png')
    figure_counter = display_figure_labels(figure_counter, "The cumulative citations that are connected to the publications each year for {}".format(name_of_researcher), title = 'citations_line_graph_semantic')
    

In [None]:
%%appyter markdown
## Obtaining PMIDs for {{researcher_name.raw_value}} (from PubMed)
The publication summary data and collaboration data are provided from the [Entrez API](https://www.ncbi.nlm.nih.gov/books/NBK25501/) using the PubMed database to collect the publication information. The collaboration and publications are plotted in graphs with respect to the years in which the researchers conducted research. 

In [None]:

params = {
    'term': "{}".format(pubmed_name)
}

#Get the pubmed publications for the researcher with the pmids
info = Entrez.esearch(db="PubMed", term= pubmed_name, retmax = "5000") 
info = Entrez.read(info)
identifiers = info['IdList'] # Get list of identifiers which are pmids
if len(identifiers) == 0:
    print("This name does not have any publications returned from PubMed.")
else:
    display(Markdown("### Link to [PubMed Query](https://pubmed.ncbi.nlm.nih.gov/?{}) for {}".format(urlencode(params), name_of_researcher)))
    # Use the Entrez module efetch for the publication records for the PMIDs
    records = Entrez.efetch(db="pubmed", id=identifiers, rettype="medline", retmode="text")
    publications = records.read().split("\n\n")
    dict_to_store_published_year = defaultdict(int)
    dict_to_store_collabs_per_year= defaultdict(int)
    #Making sure to not include the same collaborators with this dictionary
    dict_to_store_used_collabs= defaultdict(set)
    collaborator_set = {}
    for pub in publications:
        title = pub.split("TI  - ")[1].split('\n')[0].strip()
        year_published = int(pub.split("DP  - ")[1].split('\n')[0].split()[0].strip()[:4])
        dict_to_store_published_year[year_published] += 1
        #Collaboration Information For each year and making sure that if there are the same ones within the year that those are excluded. 
        for line in pub.split('\n'):
            if line.startswith('FAU - '):
                #Checking if the name of the current researcher is in the author names and skipping that
                if name_of_researcher.split()[0] in line and name_of_researcher.split()[-1] in line:
                    continue
                collab_name = line[6:].strip()
                if year_published in dict_to_store_used_collabs and collab_name in dict_to_store_used_collabs[year_published]:
                    continue
                dict_to_store_collabs_per_year[year_published] += 1
                dict_to_store_used_collabs[year_published].add(collab_name)
    year_keys = list(dict_to_store_published_year.keys())
    year_keys.sort()
    sorted_data = {year:dict_to_store_published_year[year] for year in year_keys}

    fig = make_bar_plot(sorted_data, 'Year Published', "Publications",f"Publications each Year for {name_of_researcher}", "Sourced from PubMed")
    fig_line = make_line_plot(sorted_data, 'Year Published', "Cumulative Publications",f"Cumulative Publications each Year for {name_of_researcher}", "Sourced from PubMed")
    fig.show()
    fig.write_image(output_folder+'publications_bar.png')
    figure_counter = display_figure_labels(figure_counter, "Publications each year for {} with the total number here being {}".format(name_of_researcher, str(len(publications))), title = 'publications_bar')

    fig_line.show()
    fig_line.write_image(output_folder+'publications_line_graph.png')
    figure_counter = display_figure_labels(figure_counter, "The cumulative publications each year for {}".format(name_of_researcher), title = 'publications_line_graph')
    displaytext = "{} has published {} publications according to the information on the NBCI database. The first publication was in {} and the most recent publication was in {}.".format(name_of_researcher, str(len(publications)), str(year_keys[0]), str(year_keys[-1]))
    card_html = f"""
    <div style="background-color: #f1f1f1; padding: 20px; border-radius: 5px; width: 700px;">
        <h3 style="color: #333;">NCBI PubMed Summary</h3>
        <p style="color: #333;">{displaytext}</p>
    </div>
    """
    display(HTML(card_html))



    ###Collaboration Information per year from PubMed
    year_keys = list(dict_to_store_collabs_per_year.keys())
    year_keys.sort()
    sorted_data = {year:dict_to_store_collabs_per_year[year] for year in year_keys}
    fig = make_bar_plot(sorted_data, "Year", "Collaborations", f"Collaborations each Year for {name_of_researcher}", "Sourced from PubMed")
    fig_line = make_line_plot(sorted_data, "Year", "Cumulative Collaborations", f"Cumulative Collaborations each Year for {name_of_researcher}", "Sourced from PubMed")
    fig.show()
    fig.write_image(output_folder+'collaborations_bar.png')
    figure_counter = display_figure_labels(figure_counter, "Collaborations each year for {}".format(name_of_researcher), title ='collaborations_bar')

    fig_line.show()
    fig_line.write_image(output_folder+'collaborations_line_graph.png')
    figure_counter = display_figure_labels(figure_counter, "The cumulative collaborations each year for {}".format(name_of_researcher), title = 'collaborations_line_graph')

In [None]:
%%appyter markdown
## Notice of Awards(NOAs) and Grant information (from RePORTER)
The grant and notice of awards summary data are provided from the [RePORTER API](https://api.reporter.nih.gov/) using the name of the researcher as the input query. The grants and notice of awards are plotted in graphs with respect to the years in which the researchers conducted research. 

In [None]:

payload =   {
        "criteria":
        {
            'pi_names':[{'any_name':name_of_researcher}]
        },

         "offset":0,
         "limit":500,
         "sort_field":"project_start_date",
         "sort_order":"asc"
   }
url = 'https://api.reporter.nih.gov/v2/projects/search'
response = requests.post(url, json=payload)


In [None]:
if response.status_code == 200:
    data = json.loads(response.text)
    dict_storing_award_amount = defaultdict(int)
    dict_storing_award_amount_at_each_year = defaultdict(int)
    dict_to_store_project_year = defaultdict(int)
    dict_storing_grant_to_year = {}
    if len(data['results']) == 0:
        print("There is no project or grant information that is availabe from RePORTER for this researcher")
    else:
        display(Markdown("### Link to Summary of the Projects/Grants from [RePORTER website]({}) for {}".format(data['meta']['properties']['URL'], name_of_researcher)))
        # print(data['results'][0].keys())
        for idx, project in enumerate(data['results']):   
            organization_from_reporter = project['organization']['org_name']
            grant_num = project['core_project_num']
            year_started = None
            # Project information year collection. Taking either the project start date year if available or the current fiscal year for that project
            if project['award_notice_date'] != None:
                year_started = int(project['award_notice_date'].split('-')[0])
                dict_to_store_project_year[year_started] += 1
            elif project['project_start_date'] != None:
                year_started = int(project['project_start_date'].split('-')[0])
                dict_to_store_project_year[year_started] += 1
            else:
                year_started = int(project['fiscal_year'])
                dict_to_store_project_year[year_started] += 1
            # Grant information year collection and grant funding information. Storing the earliest possible year for the grant information and for the grant money awarded,
            # storing both the earlest year it correlated to and the actual year it was awarded based on fiscal year. 
            if grant_num not in dict_storing_grant_to_year and project['award_amount'] != None:
                dict_storing_grant_to_year[grant_num] = year_started
                dict_storing_award_amount[year_started] += int(project['award_amount'])
                dict_storing_award_amount_at_each_year[project['fiscal_year']] += int(project['award_amount'])
            elif project['award_amount'] != None:
                if year_started < dict_storing_grant_to_year[grant_num]:
                    dict_storing_grant_to_year[grant_num] = year_started
                dict_storing_award_amount[year_started] += int(project['award_amount'])
                dict_storing_award_amount_at_each_year[project['fiscal_year']] += int(project['award_amount'])

        dict_storing_grants_per_year = defaultdict(int)
        for key, val in dict_storing_grant_to_year.items():
            dict_storing_grants_per_year[val] += 1

        ##### CODE FOR MAKING THE PLOTS FOR TOTAL PROJECTS/NOAs ######
        year_keys = list(dict_to_store_project_year.keys())
        year_keys.sort()
        sorted_ = {year:dict_to_store_project_year[year] for year in year_keys}
        fig = make_bar_plot(sorted_,'Year of NOA', "NOAs", f"Notice of Awards each Year for {name_of_researcher}", "Sourced from RePORTER")
        fig_line = make_line_plot(sorted_, 'Year of NOA', "Cumulative NOAs", f"Cumulative Notice of Awards each Year for {name_of_researcher}", "Sourced from RePORTER")
        fig.show()
        fig.write_image(output_folder+'noa_bar.png')
        figure_counter = display_figure_labels(figure_counter, "Notice of Award each year for {}.".format(name_of_researcher), title = 'noa_bar')
        fig_line.show()
        fig_line.write_image(output_folder+'noa_line_graph.png')
        figure_counter = display_figure_labels(figure_counter, "The cumulative NOAs awarded each year for {}".format(name_of_researcher), title= 'noa_line_graph')



        ###### CODE FOR MAKING THE PLOTS FOR TOTAL GRANTS ######
        year_keys = list(dict_storing_grants_per_year.keys())
        year_keys.sort()
        sorted_grants = {year:dict_storing_grants_per_year[year] for year in year_keys}
        fig = make_bar_plot(sorted_grants,'Year of Grant', "Grants", f"Grants Awarded each Year to {name_of_researcher}", "Sourced from RePORTER")
        fig_line = make_line_plot(sorted_grants, 'Year of Grant', "Grants", f"Cumulative Grants Awarded each Year to {name_of_researcher}", "Sourced from RePORTER")
        fig.show()
        fig.write_image(output_folder+'grants_bar.png')
        figure_counter = display_figure_labels(figure_counter, "Grants awarded each year to {} as a PI.".format(name_of_researcher), title = 'grants_bar')

        fig_line.show()
        fig_line.write_image(output_folder+'grants_line_graph.png')
        figure_counter = display_figure_labels(figure_counter, "The cumulative grants awarded each year to {} as a PI".format(name_of_researcher), title = 'grants_line_graph')




In [None]:
if response.status_code == 200:
    ###### CODE FOR MAKING THE PLOTS FOR TOTAL Award Amount from that specific year######
    if len(dict_storing_award_amount_at_each_year) !=0:
        year_keys = list(dict_storing_award_amount_at_each_year.keys())
        year_keys.sort()
        sorted_amounts= {year:dict_storing_award_amount_at_each_year[year] for year in year_keys}
        fig = make_bar_plot(sorted_amounts,'Year', "Grant Amount Awarded($)", f"Grant Amount Awarded each Year to {name_of_researcher}", "Sourced from RePORTER")
        fig_line = make_line_plot(sorted_amounts, 'Year', "Grant Amount Awarded($)", f"Cumulative Grant Amount each Year to {name_of_researcher}", "Sourced from RePORTER")
        fig.show()
        fig.write_image(output_folder+'grants_amount_bar.png')
        figure_counter = display_figure_labels(figure_counter, "Total grant funding awarded each year for {}.".format(name_of_researcher), title = 'grants_amount_bar')

        fig_line.show()
        fig_line.write_image(output_folder+'grants_amount_line_graph.png')
        figure_counter = display_figure_labels(figure_counter, "The cumulative amount of grant value awarded each year for {}".format(name_of_researcher), title = 'grants_amount_line_graph')
    else:
        print("No Grant Money Awarded Information for {}".format(name_of_researcher))

In [None]:
%%appyter markdown
## Gene Associations for {{researcher_name.raw_value}} (from GeneShot)
[Geneshot](https://maayanlab.cloud/geneshot/) finds publications that mention both the search terms and genes

In [None]:
GENESHOT_URL = 'https://maayanlab.cloud/geneshot/api/search'
payload = {"rif": "autorif", "term": pubmed_name}
response = requests.post(GENESHOT_URL, json=payload)
gene_count = {}
params = {"searchin" : name_of_researcher}
if response.status_code == 200:
    display(Markdown("### Link to the [GeneShot Page](https://maayanlab.cloud/geneshot/index.html?{}&searchnot=&rif=autorif) for {}".format(urlencode(params), name_of_researcher)))
    data = json.loads(response.text)
    gene_count = data['gene_count']
    if len(gene_count) == 0:
        print("No associated Genes")
    else:
        #Getting a dictionary of just the whoel number gene counts
        gene_count_only = {key: value[0] for key, value in gene_count.items() if not key.startswith('ENSP')}
        gene_frequency_compared_pubmed_articles = {key: value[1] for key, value in gene_count.items() if not key.startswith('ENSP')}
        # Create a WordCloud object and generate it from the gene counts
        wordcloud = WordCloud(width=800, height=800, background_color='white')
        wordcloud.generate_from_frequencies(gene_count_only)
        plt.figure(figsize=(8,8))
        plt.imshow(wordcloud, interpolation='bilinear')
        plt.axis('off')
        plt.show()
        # Sort the dictionaries by the values
        sorted_genes_by_cit = sorted(gene_count_only.items(), key = lambda x:x[1], reverse=True)[:20]
        gene_count_only.clear()
        for key, value in sorted_genes_by_cit:
            gene_count_only[key] = value
        fig = make_bar_plot(gene_count_only, 'Gene',"Citation Count", f"Associated Genes of {name_of_researcher}")
        fig.show()
        fig.write_image(output_folder+'genes_count_bar.png')
        figure_counter = display_figure_labels(figure_counter, "The bar graph displays the genes associated to {} with the total citation count for each of them.".format(name_of_researcher), title = 'genes_count_bar')
        
        sorted_genes_by_frac = sorted(gene_frequency_compared_pubmed_articles.items(), key = lambda x:x[1], reverse=True)[:20]
        gene_frequency_compared_pubmed_articles.clear()
        for key, value in sorted_genes_by_frac:
            gene_frequency_compared_pubmed_articles[key] = value
        fig_freq = make_bar_plot(gene_frequency_compared_pubmed_articles, 'Gene', "Frequency", f"Associated Genes with Normalized Frequency of {name_of_researcher}")
        fig_freq.show()
        fig_freq.write_image(output_folder+'gene_frequency_bar.png')
        figure_counter = display_figure_labels(figure_counter, "The bar graph displays the genes associated to {} with the frequency relative to the total publications for each of them.".format(name_of_researcher), title = 'gene_frequency_bar')

else:
    print(response.status_code)
    print("Error with request")


In [None]:
%%appyter markdown
## Predicted Associated Genes for {{researcher_name.raw_value}} (from GeneShot)

The top 20 associated genes were used in prediction. Genes are always ranked by the product of the number of publication count and the publication frequency. ARCHS4 co-expression was used to establish gene-gene similarity.

In [None]:
GENESHOT_URL = 'https://maayanlab.cloud/geneshot/api/associate'
payload = {
  "gene_list": list(gene_count.keys()),
  "similarity": "coexpression" 
}
response = requests.post(GENESHOT_URL, json=payload)
if response.status_code == 200:
    data = json.loads(response.text)
    if len(gene_count.keys()) == 0:
        print("No predicted genes for this researcher")
    else:
        dict_for_predicted_genes_with_score = {}
        for gene, gene_dict in data['association'].items():
            dict_for_predicted_genes_with_score[gene] = gene_dict['simScore']

        sorted_genes_by_frac = sorted(dict_for_predicted_genes_with_score.items(), key = lambda x:x[1], reverse=True)[:20]
        dict_for_predicted_genes_with_score.clear()
        for key, value in sorted_genes_by_frac:
            dict_for_predicted_genes_with_score[key] = value

        fig_predicted = make_bar_plot(dict_for_predicted_genes_with_score, 'Gene', "Similiarity Score", f"Predicted Genes of {name_of_researcher}")
        fig.update_layout(yaxis=dict(range=[0, max(dict_for_predicted_genes_with_score.values())]))
        fig_predicted.show()
        fig_predicted.write_image(output_folder+'gene_predicted_bar.png')
        figure_counter = display_figure_labels(figure_counter, "The bar graph displays the predicted genes associated to {} with the similarity score from the coexpression with the input genes".format(name_of_researcher), title = 'gene_predicted_bar')

else:
    print("Error in API request", response.status_code)

In [None]:
%%appyter markdown
## Studied Drug Associations for {{researcher_name.raw_value}} (from DrugShot)
[DrugShot](https://maayanlab.cloud/drugshot/) finds publications that mention both the search terms and small molecules

In [None]:
DRUGSHOT_URL = 'https://maayanlab.cloud/drugshot/api/search'
payload = {"rif": "autorif", "term": pubmed_name}
drug_count = {}
response = requests.post(DRUGSHOT_URL, json=payload)
params = {"searchin" : name_of_researcher}
if response.status_code == 200:
    data = json.loads(response.text)
    display(Markdown("### Link to the [DrugShot Page](https://maayanlab.cloud/drugshot/index.html?{}&searchnot=&rif=autorif) for {}".format(urlencode(params), name_of_researcher)))
    if len(data['drug_count']) == 0:
        print("No associated drugs for this researcher")
    else:
        drug_count = data['drug_count']
        drug_count_only = {key: value[0] for key, value in drug_count.items()}
        drug_frequency_compared_pubmed_articles = {key: value[1] for key, value in drug_count.items()}
        # Create a WordCloud object and generate it from the gene counts
        wordcloud = WordCloud(width=800, height=800, background_color='white')
        wordcloud.generate_from_frequencies(drug_count_only)
        plt.figure(figsize=(8,8))
        plt.imshow(wordcloud, interpolation='bilinear')
        plt.axis('off')
        plt.show()
        sorted_drugs_by_cit = sorted(drug_count_only.items(), key = lambda x:x[1], reverse=True)[:20]
        drug_count_only.clear()
        for key, value in sorted_drugs_by_cit:
            drug_count_only[key] = value
        fig = make_bar_plot(drug_count_only, 'Drug', "Citations", f"Associated Studied Drugs of {name_of_researcher}")
        fig.show()
        fig.write_image(output_folder+'drug_associated_bar.png')
        figure_counter = display_figure_labels(figure_counter, "The bar graph displays the drugs associated to {} with the total times that drug was cited for them.".format(name_of_researcher), title = 'gene_predicted_bar')
        ##Associated Drugs by Frequency compared to total citations##
        sorted_drugs_by_cit_freq = sorted(drug_frequency_compared_pubmed_articles.items(), key = lambda x:x[1], reverse=True)[:20]
        drug_frequency_compared_pubmed_articles.clear()
        for key, value in sorted_drugs_by_cit_freq:
            drug_frequency_compared_pubmed_articles[key] = value
        fig_freq = make_bar_plot(drug_frequency_compared_pubmed_articles, 'Drug', "Citation Frequency", f"Associated Studied Drugs of {name_of_researcher}")
        fig_freq.show()
        fig_freq.write_image(output_folder+'drug_associated_bar.png')
        figure_counter = display_figure_labels(figure_counter, "The bar graph displays the drugs associated to {} with the frequency relative to the total publications for each of them.".format(name_of_researcher), title = 'gene_predicted_bar')
        

else:
    print("Error in request to DrugShot API", response.status_code)

In [None]:
%%appyter markdown
## Predicted Associated Drugs for {{researcher_name.raw_value}}
The top 20 associated drugs were used in the prediction. Drugs are always ranked by the product of the number of publication count and the publication frequency.

In [None]:
DRUGSHOT_URL = 'https://maayanlab.cloud/drugshot/api/associate'
payload = {
  "drug_list": list(drug_count.keys()),
  "similarity": "L1000_coexpression" 
}
response = requests.post(DRUGSHOT_URL, json=payload)
if response.status_code == 200:
    data = json.loads(response.text)
    dict_for_predicted_drugs_with_score = {}
    if len(drug_count.keys()) != 0:
        for gene, gene_dict in data['association'].items():
            dict_for_predicted_drugs_with_score[gene] = gene_dict['simScore']
        sorted_drugs_by_frac = sorted(dict_for_predicted_drugs_with_score.items(), key = lambda x:x[1], reverse=True)[:30]
        dict_for_predicted_drugs_with_score.clear()
        for key, value in sorted_drugs_by_frac:
            dict_for_predicted_drugs_with_score[key] = value
        fig = make_bar_plot(dict_for_predicted_drugs_with_score,'Drug', "Similiarity Score", f"Predicted Drugs of {name_of_researcher}")
        fig.update_layout(yaxis=dict(range=[0, max(dict_for_predicted_drugs_with_score.values())]))
        fig.show()
        fig.write_image(output_folder+'drugs_predicted_bar.png')
        figure_counter = display_figure_labels(figure_counter, "The bar graph displays the predicted drugs associated to {} with the similarity score from coexpression with the input drugs".format(name_of_researcher), title = 'drugs_predicted_bar')
    else:
        print("No predicted genes given the gene list")
else:
  print("Request Error to DrugShot for predicted genes", response.status_code)