In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme(palette='pastel')
plt.rcParams['figure.figsize'] = (12, 6)
pd.set_option('display.max_columns', None)

In [12]:
import re
def parse_article_data(article_string):
    # Remove leading and trailing whitespaces/newlines
    article_string = article_string.strip()
    
    # Extract the type of publication (search for the first meaningful word)
    publication_type_match = re.search(r'\b(Article|Review|Research|Case Study|Editorial|Open access)\b', article_string)
    publication_type = publication_type_match.group(1) if publication_type_match else "Unknown"
    
    # Extract the title of the article by skipping known irrelevant words
    title_match = re.search(r'\b(?:Review|Article|Research|Case Study|Editorial|Open access)\b\s*\n*([^\n]+)', article_string)
    title = title_match.group(1) if title_match else "Unknown Title"
    
    # Extract the authors (between the title and the journal name)
    authors = re.findall(r'(\w+,\s*\w+\.\w*)', article_string)
    authors_str = ", ".join(authors) if authors else "Unknown Authors"
    
    # Extract the journal name (after the last author)
    journal_match = re.search(r'\n([^,\n]+), \d{4}', article_string)
    journal = journal_match.group(1) if journal_match else "Unknown Journal"
    
    return {
        'Type of Publication': publication_type,
        'Title': title,
        'Authors': authors_str,
        'Journal': journal
    }
    
def split_string(string):
  """Splits a string containing names into individual names.

  Args:
    string: The input string.

  Returns:
    A list of individual names.
  """

  # Split the string by commas, but only between 2 commas
  names = string.split(',')

  # Combine the split names back into pairs
  paired_names = [names[i] + names[i+1] for i in range(0, len(names), 2)]
  paired_names = [name.strip() for name in paired_names if "pp" not in name]

  return paired_names

In [13]:
articulos_doshi = """
Article
Identifying data-driven subtypes of major depressive disorder with electronic health records
Sharma, A.
, 
Verhaak, P.F.
, 
McCoy, T.H.
, 
Perlis, R.H.
, 
Doshi-Velez, F.
Journal of Affective DisordersThis link is disabled., 2024, 356, pp. 64–70

Show abstract
This link is disabled.
Related documents
This link is disabled.
 
0
Citations
Review  •  Open access
Ethical and regulatory challenges of large language models in medicine
Ong, J.C.L.
, 
Chang, S.Y.-H.
, 
William, W.
, ... 
Savulescu, J.
, 
Ting, D.S.W.
The Lancet Digital HealthThis link is disabled., 2024, 6(6), pp. e428–e432

Show abstract
This link is disabled.
Related documents
This link is disabled.
 
1
Citations
Conference Paper  •  Open access
Accuracy-Time Tradeoffs in AI-Assisted Decision Making under Time Pressure
Swaroop, S.
, 
Buçinca, Z.
, 
Gajos, K.Z.
, 
Doshi-Velez, F.
ACM International Conference Proceeding Series, 2024, pp. 138–154

Show abstract
This link is disabled.
Related documents
This link is disabled.
 
1
Citations
Conference Paper
Reinforcement Learning Interventions on Boundedly Rational Human Agents in Frictionful Tasks
Nofshin, E.
, 
Swaroop, S.
, 
Pan, W.
, 
Murphy, S.
, 
Doshi-Velez, F.
Proceedings of the International Joint Conference on Autonomous Agents and Multiagent Systems, AAMAS, 2024, 2024-May, pp. 1482–1491

Show abstract
This link is disabled.
Related documents
This link is disabled.
 
0
Citations
Review  •  Open access
Artificial intelligence education: An evidence-based medicine approach for consumers, translators, and developers
Ng, F.Y.C.
, 
Thirunavukarasu, A.J.
, 
Cheng, H.
, ... 
Coffman, T.
, 
Ting, D.S.W.
Cell Reports MedicineThis link is disabled., 2023, 4(10), 101230

Show abstract
This link is disabled.
Related documents
This link is disabled.
 
6
Citations
Conference Paper
Reward Design for an Online Reinforcement Learning Algorithm Supporting Oral Self-Care
Trella, A.L.
, 
Zhang, K.W.
, 
Nahum-Shani, I.
, ... 
Doshi-Velez, F.
, 
Murphy, S.A.
Proceedings of the 37th AAAI Conference on Artificial Intelligence, AAAI 2023, 2023, 37, pp. 15724–15730

Show abstract
This link is disabled.
Related documents
This link is disabled.
 
3
Citations
Conference Paper
The Unintended Consequences of Discount Regularization: Improving Regularization in Certainty Equivalence Reinforcement Learning
Rathnam, S.
, 
Parbhoo, S.
, 
Pan, W.
, 
Murphy, S.A.
, 
Doshi-Velez, F.
Proceedings of Machine Learning Research, 2023, 202, pp. 28746–28767

Show abstract
This link is disabled.
Related documents
This link is disabled.
 
1
Citations
Conference Paper
PERFORMANCE BOUNDS FOR MODEL AND POLICY TRANSFER IN HIDDEN-PARAMETER MDPS
Fu, H.
, 
Yao, J.
, 
Gottesman, O.
, 
Doshi-Velez, F.
, 
Konidaris, G.
11th International Conference on Learning Representations, ICLR 2023, 2023

Show abstract
This link is disabled.
Related documents
This link is disabled.
 
1
Citations
Conference Paper  •  Open access
Travel-time prediction using neural-network-based mixture models
Sharma, A.
, 
Zhang, J.
, 
Nikovski, D.
, 
Doshi-Velez, F.
Procedia Computer ScienceThis link is disabled., 2023, 220, pp. 1033–1038

Show abstract
This link is disabled.
Related documents
This link is disabled.
 
0
Citations
Article  •  Open access
An interpretable RL framework for pre-deployment modeling in ICU hypotension management
Zhang, K.
, 
Wang, H.
, 
Du, J.
, ... 
Celi, L.A.
, 
Doshi-Velez, F.
npj Digital MedicineThis link is disabled., 2022, 5(1), 173

Show abstract
This link is disabled.
Related documents
This link is disabled.
 
2
Citations"""

In [20]:
separador = "\n\nShow abstract\nThis link is disabled.\nRelated documents\nThis link is disabled.\n "
articulos_separados = articulos_doshi.split(separador)
articulos_parseados = list(map(parse_article_data, articulos_separados))
resumen_articulos = pd.DataFrame(articulos_parseados).iloc[:-1]

In [21]:
resumen_articulos["Authors"] = resumen_articulos["Authors"].apply(split_string)
resumen_articulos = resumen_articulos.explode("Authors")

In [23]:
resumen_articulos["Authors"].value_counts()

Authors
Velez F.               8
Sharma A.              2
Ting D.S               2
Murphy S.A             2
Pan W.                 2
Swaroop S.             2
Nikovski D.            1
Zhang J.               1
Konidaris G.           1
Zhang K.               1
Trella A.L             1
Gottesman O.           1
Yao J.                 1
Fu H.                  1
Wang H.                1
Parbhoo S.             1
Rathnam S.             1
Du J.                  1
Shani I.               1
Zhang K.W              1
Cheng H.               1
Coffman T.             1
Verhaak P.F            1
Thirunavukarasu A.J    1
Ng F.Y                 1
Murphy S.              1
Nofshin E.             1
Gajos K.Z              1
Buçinca Z.             1
Savulescu J.           1
William W.             1
Chang S.Y              1
Ong J.C                1
Perlis R.H             1
McCoy T.H              1
Celi L.A               1
Name: count, dtype: int64