In [1328]:
import pandas as pd
from pandera.typing import DataFrame
from loguru import logger
from sys import stderr

logger.remove()
logger.add(
    stderr,
    level="INFO",
    format="<cyan>[{file.name}:{line} - {function}()]</cyan> <green>{time:YYYY-MM-DD HH:mm:ss}</green> - {level} - <level>{message}</level>",
)


from datetime import datetime
from typing import List, Dict, Set
import json
import os
import re


from modules.constants import CLINICAL_TRIALS_PATHS, PUBMED_PATHS, DRUGS_PATHS, OUTPUT_PATH

In [1329]:

def create_folders_if_not_exist(output_filepath:str) -> None:
    path_split = output_filepath.split('/')
    current_path = ''
    
    for folder in path_split:
        if '.' not in folder and not os.path.exists(folder):
            current_path += folder + '/'
            os.makedirs(current_path)


def write_dict_to_file(output_filepath:str, dictionary:Dict) -> None:
    create_folders_if_not_exist(output_filepath)
    
    with open(output_filepath, 'w', encoding='utf-8') as hd:
        json.dump(dictionary, hd, indent=4, ensure_ascii=False)

In [1330]:

def load_df_from_csv(filepath:str, delimiter:str=',', header:int=0) -> DataFrame:
    return pd.read_csv(filepath, delimiter=delimiter, header=header)

def load_df_from_json(filepath:str) -> DataFrame:
    return pd.read_json(filepath)

def load_df_from_dict(dictionary:Dict) -> DataFrame:
    return pd.DataFrame.from_dict(dictionary)

def fix_broken_json(filepath:str) -> Dict:
    """ When the json file is broken (trailing commas), we will cleaned it and import it as dictionary then load the dataframe """
    with open(filepath, 'r', encoding='utf-8') as hd:
        json_str = hd.read()
    
    json_str = json_str.replace('null', 'None').replace('true', 'True').replace('false', 'False')
    cleaned_json = eval(json_str)
    
    logger.info(f"Successfully fixed and loaded the broken Json file.")
    return cleaned_json

In [1331]:
def import_json_file_as_dict(filepath:str) -> Dict:
    try:
        with open(filepath, 'r', encoding='utf-8') as hd:
            return json.load(hd)
            
    except ValueError:
        logger.warning(f'Broken json detected in {filepath}. Attempting to clean it and re-load it.')
        return fix_broken_json(filepath)

# Merging DataFrames

In [1332]:

def merge_dataframes(list_dataframes:List) -> DataFrame:
    return pd.concat(list_dataframes)

In [1333]:

def load_data(paths:List) -> DataFrame:
    list_dfs = []
    
    for path in paths:
        if path.endswith('.csv'):
            df = load_df_from_csv(path)
            
        elif path.endswith('.json'):
            try:
                df = load_df_from_json(path)
            except ValueError:
                logger.warning(f'Broken json detected in {path}. Attempting to clean it and re-load it.')
                fixed_json = fix_broken_json(path)
                df = load_df_from_dict(fixed_json)
            
        else:
            raise Exception(f'The provided path {path} has an incompatible file extension (not csv nor json).')

        list_dfs.append(df)
    
    df = merge_dataframes(list_dfs)
    
    logger.info(f"Successfully loaded and merged dataframes from {paths}.")
    return df

In [1334]:
# Loading data
clinical_df = load_data(CLINICAL_TRIALS_PATHS)
pubmed_df = load_data(PUBMED_PATHS)
drugs_df = load_data(DRUGS_PATHS)

[36m[238570550.py:23 - load_data()][0m [32m2024-09-21 19:55:53[0m - INFO - [1mSuccessfully loaded and merged dataframes from ['clinical_trials.csv'].[0m
[36m[561034247.py:18 - fix_broken_json()][0m [32m2024-09-21 19:55:53[0m - INFO - [1mSuccessfully fixed and loaded the broken Json file.[0m
[36m[238570550.py:23 - load_data()][0m [32m2024-09-21 19:55:53[0m - INFO - [1mSuccessfully loaded and merged dataframes from ['pubmed.csv', 'pubmed.json'].[0m
[36m[238570550.py:23 - load_data()][0m [32m2024-09-21 19:55:53[0m - INFO - [1mSuccessfully loaded and merged dataframes from ['drugs.csv'].[0m


# Cleaning

In [1335]:

def normalize_dates_format(df:DataFrame, date_column_name:str, output_date_format:str = '%Y-%m-%d') -> DataFrame:
    df[date_column_name] = pd.to_datetime(df[date_column_name], dayfirst=True)
    df[date_column_name] = df[date_column_name].dt.strftime(output_date_format)
    df[date_column_name] = df[date_column_name].apply(lambda x: datetime.strptime(x, output_date_format))
    return df

In [1336]:

def cast_id_as_string(df:DataFrame, id_column_name:str) -> DataFrame:
    #! To uniformiser the id column
    df[id_column_name] = df[id_column_name].astype(str)
    return df

In [1337]:

def rename_column(df:DataFrame, column_naming_mapping:Dict) -> DataFrame:
    return df.rename(columns=column_naming_mapping)

In [1338]:

def fill_in_missing_ids_int(df:DataFrame, id_column_name:str) -> DataFrame:
    # This only works if id is integer
    # We will interpolate linearly the ids
    
    df[id_column_name] = pd.to_numeric(df[id_column_name], errors='coerce')
    
    max_id = int(df[id_column_name].max())
    number_missing_rows  = df[id_column_name].isna().sum()
    
    id_range = range(int(max_id) + 1, int(max_id) + 1 + number_missing_rows)
    
    df.loc[df[id_column_name].isna(), id_column_name] = id_range
    df[id_column_name] = df[id_column_name].astype(int)
    return df

In [1339]:

def clean_titles(articleTitle:str) -> str:
    # Remove encoding issues like \xc3\x28, we are focusing solely on \x followed by 2 characters or digits
    articleTitle = re.sub(r'\\x[0-9a-fA-F]{2}', '', articleTitle)
    
    # Remove punctuations except hyphens "-"
    articleTitle = re.sub(r'[^\w\s&À-ÿ-]', '', articleTitle)
    
    # Title Case
    articleTitle = articleTitle.title()
    
    # Normalize number of spaces (remove extra spaces)
    articleTitle = re.sub(r'\s+', ' ', articleTitle)
    
    # Remove trailing spaces
    articleTitle = articleTitle.strip()
    
    return articleTitle

In [1340]:

def drop_empty_titles_and_journals(df:DataFrame) -> DataFrame:
    drop_condition = (df['title'] == '') | (df['journal'] == '')
    filtered_df = df[~drop_condition]
    return filtered_df

In [1341]:

def merge_rows(group):
    """ This function will fille all missing data  based on the other instances of the same row, then drops the duplicates by returning only one row """
    return group.ffill().bfill().iloc[0]

=== Cleaning

In [1342]:
clinical_df = rename_column(clinical_df, {'scientific_title': 'title'})
clinical_df

Unnamed: 0,id,title,date,journal
0,NCT01967433,Use of Diphenhydramine as an Adjunctive Sedati...,1 January 2020,Journal of emergency nursing
1,NCT04189588,Phase 2 Study IV QUZYTTIR™ (Cetirizine Hydroch...,1 January 2020,Journal of emergency nursing
2,NCT04237090,,1 January 2020,Journal of emergency nursing
3,NCT04237091,Feasibility of a Randomized Controlled Clinica...,1 January 2020,Journal of emergency nursing
4,NCT04153396,Preemptive Infiltration With Betamethasone and...,1 January 2020,Hôpitaux Universitaires de Genève
5,NCT03490942,Glucagon Infusion in T1D Patients With Recurre...,25/05/2020,
6,,Glucagon Infusion in T1D Patients With Recurre...,25/05/2020,Journal of emergency nursing
7,NCT04188184,Tranexamic Acid Versus Epinephrine During Expl...,27 April 2020,Journal of emergency nursing\xc3\x28


In [1343]:
drugs_df = rename_column(drugs_df, {'drug': 'name'})
drugs_df

Unnamed: 0,atccode,name
0,A04AD,DIPHENHYDRAMINE
1,S03AA,TETRACYCLINE
2,V03AB,ETHANOL
3,A03BA,ATROPINE
4,A01AD,EPINEPHRINE
5,6302001,ISOPRENALINE
6,R01AD,BETAMETHASONE


In [1344]:
clinical_df_new = normalize_dates_format(clinical_df, 'date', '%Y-%m-%d')
clinical_df_new

Unnamed: 0,id,title,date,journal
0,NCT01967433,Use of Diphenhydramine as an Adjunctive Sedati...,2020-01-01,Journal of emergency nursing
1,NCT04189588,Phase 2 Study IV QUZYTTIR™ (Cetirizine Hydroch...,2020-01-01,Journal of emergency nursing
2,NCT04237090,,2020-01-01,Journal of emergency nursing
3,NCT04237091,Feasibility of a Randomized Controlled Clinica...,2020-01-01,Journal of emergency nursing
4,NCT04153396,Preemptive Infiltration With Betamethasone and...,2020-01-01,Hôpitaux Universitaires de Genève
5,NCT03490942,Glucagon Infusion in T1D Patients With Recurre...,2020-05-25,
6,,Glucagon Infusion in T1D Patients With Recurre...,2020-05-25,Journal of emergency nursing
7,NCT04188184,Tranexamic Acid Versus Epinephrine During Expl...,2020-04-27,Journal of emergency nursing\xc3\x28


In [1345]:
articles_group = clinical_df_new.groupby(['title', 'date'])
clinical_df_new = articles_group.apply(merge_rows).reset_index(drop=True)
clinical_df_new

Unnamed: 0,id,title,date,journal
0,NCT04237090,,2020-01-01,Journal of emergency nursing
1,NCT04237091,Feasibility of a Randomized Controlled Clinica...,2020-01-01,Journal of emergency nursing
2,NCT03490942,Glucagon Infusion in T1D Patients With Recurre...,2020-05-25,Journal of emergency nursing
3,NCT04189588,Phase 2 Study IV QUZYTTIR™ (Cetirizine Hydroch...,2020-01-01,Journal of emergency nursing
4,NCT04153396,Preemptive Infiltration With Betamethasone and...,2020-01-01,Hôpitaux Universitaires de Genève
5,NCT04188184,Tranexamic Acid Versus Epinephrine During Expl...,2020-04-27,Journal of emergency nursing\xc3\x28
6,NCT01967433,Use of Diphenhydramine as an Adjunctive Sedati...,2020-01-01,Journal of emergency nursing


In [1346]:
pubmed_df_new = normalize_dates_format(pubmed_df, 'date', '%Y-%m-%d')
pubmed_df_new

Unnamed: 0,id,title,date,journal
0,1.0,A 44-year-old man with erythema of the face di...,2019-01-01,Journal of emergency nursing
1,2.0,"An evaluation of benadryl, pyribenzamine, and ...",2019-01-01,Journal of emergency nursing
2,3.0,Diphenhydramine hydrochloride helps symptoms o...,2019-01-02,The Journal of pediatrics
3,4.0,Tetracycline Resistance Patterns of Lactobacil...,2020-01-01,Journal of food protection
4,5.0,Appositional Tetracycline bone formation rates...,2020-01-02,American journal of veterinary research
5,6.0,Rapid reacquisition of contextual fear followi...,2020-01-01,Psychopharmacology
6,7.0,The High Cost of Epinephrine Autoinjectors and...,2020-02-01,The journal of allergy and clinical immunology...
7,8.0,Time to epinephrine treatment is associated wi...,2020-03-01,The journal of allergy and clinical immunology...
0,9.0,Gold nanoparticles synthesized from Euphorbia ...,2020-01-01,"Journal of photochemistry and photobiology. B,..."
1,10.0,Clinical implications of umbilical artery Dopp...,2020-01-01,The journal of maternal-fetal & neonatal medicine


In [1347]:
pubmed_df_new = fill_in_missing_ids_int(pubmed_df_new, 'id')
pubmed_df_new

Unnamed: 0,id,title,date,journal
0,1,A 44-year-old man with erythema of the face di...,2019-01-01,Journal of emergency nursing
1,2,"An evaluation of benadryl, pyribenzamine, and ...",2019-01-01,Journal of emergency nursing
2,3,Diphenhydramine hydrochloride helps symptoms o...,2019-01-02,The Journal of pediatrics
3,4,Tetracycline Resistance Patterns of Lactobacil...,2020-01-01,Journal of food protection
4,5,Appositional Tetracycline bone formation rates...,2020-01-02,American journal of veterinary research
5,6,Rapid reacquisition of contextual fear followi...,2020-01-01,Psychopharmacology
6,7,The High Cost of Epinephrine Autoinjectors and...,2020-02-01,The journal of allergy and clinical immunology...
7,8,Time to epinephrine treatment is associated wi...,2020-03-01,The journal of allergy and clinical immunology...
0,9,Gold nanoparticles synthesized from Euphorbia ...,2020-01-01,"Journal of photochemistry and photobiology. B,..."
1,10,Clinical implications of umbilical artery Dopp...,2020-01-01,The journal of maternal-fetal & neonatal medicine


In [1348]:
pubmed_df_new['title'] = pubmed_df_new['title'].apply(clean_titles)
pubmed_df_new['journal'] = pubmed_df_new['journal'].apply(clean_titles)

clinical_df_new['title'] = clinical_df_new['title'].apply(clean_titles)
clinical_df_new['journal'] = clinical_df_new['journal'].apply(clean_titles)

drugs_df['name'] = drugs_df['name'].apply(clean_titles)

In [1349]:
print(pubmed_df_new.shape)
print(clinical_df_new.shape)

(13, 4)
(7, 4)


# Merging

In [1350]:
pubmed_df_new = cast_id_as_string(pubmed_df_new, 'id')
clinical_df_new = cast_id_as_string(clinical_df_new, 'id')

In [1351]:
pubmed_df_new['article_type'] = 'PubMed'
clinical_df_new['article_type'] = 'ClinicalTrial'

In [1352]:
articles_df = merge_dataframes([pubmed_df_new, clinical_df_new])
articles_df

Unnamed: 0,id,title,date,journal,article_type
0,1,A 44-Year-Old Man With Erythema Of The Face Di...,2019-01-01,Journal Of Emergency Nursing,PubMed
1,2,An Evaluation Of Benadryl Pyribenzamine And Ot...,2019-01-01,Journal Of Emergency Nursing,PubMed
2,3,Diphenhydramine Hydrochloride Helps Symptoms O...,2019-01-02,The Journal Of Pediatrics,PubMed
3,4,Tetracycline Resistance Patterns Of Lactobacil...,2020-01-01,Journal Of Food Protection,PubMed
4,5,Appositional Tetracycline Bone Formation Rates...,2020-01-02,American Journal Of Veterinary Research,PubMed
5,6,Rapid Reacquisition Of Contextual Fear Followi...,2020-01-01,Psychopharmacology,PubMed
6,7,The High Cost Of Epinephrine Autoinjectors And...,2020-02-01,The Journal Of Allergy And Clinical Immunology...,PubMed
7,8,Time To Epinephrine Treatment Is Associated Wi...,2020-03-01,The Journal Of Allergy And Clinical Immunology...,PubMed
0,9,Gold Nanoparticles Synthesized From Euphorbia ...,2020-01-01,Journal Of Photochemistry And Photobiology B B...,PubMed
1,10,Clinical Implications Of Umbilical Artery Dopp...,2020-01-01,The Journal Of Maternal-Fetal & Neonatal Medicine,PubMed


# Drop duplicates & empty titles

In [1353]:
articles_df = drop_empty_titles_and_journals(articles_df)
articles_df

Unnamed: 0,id,title,date,journal,article_type
0,1,A 44-Year-Old Man With Erythema Of The Face Di...,2019-01-01,Journal Of Emergency Nursing,PubMed
1,2,An Evaluation Of Benadryl Pyribenzamine And Ot...,2019-01-01,Journal Of Emergency Nursing,PubMed
2,3,Diphenhydramine Hydrochloride Helps Symptoms O...,2019-01-02,The Journal Of Pediatrics,PubMed
3,4,Tetracycline Resistance Patterns Of Lactobacil...,2020-01-01,Journal Of Food Protection,PubMed
4,5,Appositional Tetracycline Bone Formation Rates...,2020-01-02,American Journal Of Veterinary Research,PubMed
5,6,Rapid Reacquisition Of Contextual Fear Followi...,2020-01-01,Psychopharmacology,PubMed
6,7,The High Cost Of Epinephrine Autoinjectors And...,2020-02-01,The Journal Of Allergy And Clinical Immunology...,PubMed
7,8,Time To Epinephrine Treatment Is Associated Wi...,2020-03-01,The Journal Of Allergy And Clinical Immunology...,PubMed
0,9,Gold Nanoparticles Synthesized From Euphorbia ...,2020-01-01,Journal Of Photochemistry And Photobiology B B...,PubMed
1,10,Clinical Implications Of Umbilical Artery Dopp...,2020-01-01,The Journal Of Maternal-Fetal & Neonatal Medicine,PubMed


In [1354]:
drugs_df.drop_duplicates(subset=['atccode'], keep='first', inplace=True)
articles_df.drop_duplicates(subset=['id'], keep='first', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  articles_df.drop_duplicates(subset=['id'], keep='first', inplace=True)


# Indexing

In [1355]:
articles_df.set_index('id', inplace=True)

In [1356]:
drugs_df.set_index('atccode', inplace=True)

# Class !!

In [1357]:
from pandera.typing import DataFrame
from loguru import logger
from sys import stderr

logger.remove()
logger.add(
    stderr,
    level="INFO",
    format="<cyan>[{file.name}:{line} - {function}()]</cyan> <green>{time:YYYY-MM-DD HH:mm:ss}</green> - {level} - <level>{message}</level>",
)

from dataclasses import dataclass, field
from datetime import datetime
from typing import List, Dict


@dataclass
class JournalMentions:
    title: str
    drugs_dataFrame: DataFrame
    journal_articles_dataFrame: DataFrame  # Articles of the current journal only
    pubmed_publications: List = field(default_factory=list, init=False)
    clinical_trials_publications: List = field(default_factory=list, init=False)

    def extract_drug_from_publication_title(self, article_title: str) -> List:
        #! Hypothèse : no mentioned drug => skip
        #! index of dataframe is ID
        title_words_set = set(article_title.split())

        mentioned_drugs = []

        for drug_id, row in self.drugs_dataFrame.iterrows():
            if row["name"] in title_words_set:
                mentioned_drugs.append([drug_id, row["name"]])

        if mentioned_drugs == []:
            # No drug found, and given our hypothesis, we skip it
            logger.warning(
                f"No drug was mentioned in the following title : `{article_title}`"
            )

        return mentioned_drugs

    def get_article_information_from_id(self, article_id: str) -> Dict:
        #! Index of dataframe is ID
        #! Title should be cleaned and lowered

        current_article_row = self.journal_articles_dataFrame.loc[article_id]

        article_title = current_article_row["title"]
        mention_date = current_article_row["date"]
        article_type = current_article_row["article_type"]

        # Transform date to string
        mention_date_str = datetime.strftime(mention_date, "%Y-%m-%d")

        article_info = {
            "title": article_title,
            "date": mention_date_str,
            "isPubMed": True if article_type == "PubMed" else False,
            "isClinical": True if article_type == "ClinicalTrial" else False,
        }

        return article_info

    def build_links_articles_drug_mentions(self) -> None:
        #! no duplicate articles
        #! Articles of the current journal only, no duplicates
        for article_id in self.journal_articles_dataFrame.index:
            # Get info about articles
            article_info = self.get_article_information_from_id(article_id)

            # Find mentioned drug(s)
            list_mentioned_drugs = self.extract_drug_from_publication_title(
                article_info["title"]
            )

            for mentioned_drug_info in list_mentioned_drugs:
                mentioned_drug_id, mentioned_drug_name = mentioned_drug_info

                currLinkDict = {
                    "articleId": article_id,
                    "articleTitle": article_info["title"],
                    "mentionDate": article_info["date"],
                    "mentionedDrugID": mentioned_drug_id,
                    "mentionedDrugName": mentioned_drug_name,
                }

                if article_info["isPubMed"] is True:
                    self.pubmed_publications.append(currLinkDict)
                elif article_info["isClinical"] is True:
                    self.clinical_trials_publications.append(currLinkDict)
                else:
                    raise Exception(
                        f"Something went wrong, the article { article_info['title']} is neither clinical nor pubmed"
                    )

    def generate_article_link_graph_dict(self) -> Dict:
        self.build_links_articles_drug_mentions()

        output = {
            "title": self.title,
            "referencedBy": {
                "pubmedArticles": self.pubmed_publications,
                "clinicalTrials": self.clinical_trials_publications,
            },
        }

        return output


In [1358]:

def build_link_graph_servier(df_articles_cleaned:DataFrame, df_drugs_cleaned:DataFrame) -> Dict:
    # Get the list of all journals
    list_distinct_journals = df_articles_cleaned['journal'].unique()
    
    output_dict = {"journals": []}
    
    for journal in list_distinct_journals:
        logger.info(f"Currently generating graph for {journal}")
        articles_of_journal_condition = (df_articles_cleaned['journal'] == journal)
        
        df_articles_of_journal = df_articles_cleaned[articles_of_journal_condition]
        
        journal_instance = JournalMentions(
            title=journal,
            drugs_dataFrame=df_drugs_cleaned,
            journal_articles_dataFrame=df_articles_of_journal
        )
        
        current_graph_dict = journal_instance.generate_article_link_graph_dict()
        output_dict['journals'].append(current_graph_dict)
        
    return output_dict

In [1359]:
output_graph = build_link_graph_servier(articles_df, drugs_df)
write_dict_to_file(OUTPUT_PATH, output_graph)

[36m[4193450893.py:8 - build_link_graph_servier()][0m [32m2024-09-21 19:55:54[0m - INFO - [1mCurrently generating graph for Journal Of Emergency Nursing[0m
[36m[4193450893.py:8 - build_link_graph_servier()][0m [32m2024-09-21 19:55:54[0m - INFO - [1mCurrently generating graph for The Journal Of Pediatrics[0m
[36m[4193450893.py:8 - build_link_graph_servier()][0m [32m2024-09-21 19:55:54[0m - INFO - [1mCurrently generating graph for Journal Of Food Protection[0m
[36m[4193450893.py:8 - build_link_graph_servier()][0m [32m2024-09-21 19:55:54[0m - INFO - [1mCurrently generating graph for American Journal Of Veterinary Research[0m
[36m[4193450893.py:8 - build_link_graph_servier()][0m [32m2024-09-21 19:55:54[0m - INFO - [1mCurrently generating graph for Psychopharmacology[0m
[36m[4193450893.py:8 - build_link_graph_servier()][0m [32m2024-09-21 19:55:54[0m - INFO - [1mCurrently generating graph for The Journal Of Allergy And Clinical Immunology In Practice[0m
[

# AD-HOC !!!

In [1360]:
def get_all_articles_from_journal(journal_dict:Dict) -> List:
    pubmed = journal_dict['referencedBy']['pubmedArticles']
    clinical_trials = journal_dict['referencedBy']['clinicalTrials']
    
    return [pubmed, clinical_trials]

In [1361]:
def get_drugs_mentioned_by_journal(pubmed_of_journal:List, clinical_trials_of_journal:List, return_drug_names:bool=False) -> Set:
    mentioned_drugs_no_duplicates = set()
    all_articles = pubmed_of_journal + clinical_trials_of_journal
    
    for article_object in all_articles:
        if return_drug_names:
            mentioned_drugs_no_duplicates.add(article_object['mentionedDrugName'])
        else:
            mentioned_drugs_no_duplicates.add(article_object['mentionedDrugID'])
    
    return mentioned_drugs_no_duplicates

In [1362]:

def get_drugs_mentioned_by_similar_journals(list_journals:List, drug_name:str, skip_clinical_trials:bool) -> Set:
    output_drug_mentions = set()
    non_clinical_trials_journals = set() # For logging purposes only
    
    for journal in list_journals:
        pubmed, clinical_trials = get_all_articles_from_journal(journal)

        if clinical_trials != [] and skip_clinical_trials:
            # Skip journals that are referenced by clinical trials
            continue 
        
        set_drugs_mentioned_by_journal = get_drugs_mentioned_by_journal(
            pubmed_of_journal=pubmed, 
            clinical_trials_of_journal=clinical_trials, 
            return_drug_names=True
        )
        
        if drug_name in set_drugs_mentioned_by_journal:
            non_clinical_trials_journals.add(journal['title'])  # For logging purposes only
            output_drug_mentions = output_drug_mentions.union(set_drugs_mentioned_by_journal)
    
    non_clinical_trials_journals = list(non_clinical_trials_journals)
    
    logger.info(f"The drug {drug_name} was mentioned alongside the following drug names `{', '.join(list(output_drug_mentions))}` by these non-clinical trials referenced journals : `{', '.join(non_clinical_trials_journals)}`")
    return output_drug_mentions

In [1363]:
def fetch_top_journals() -> List:
    """ This function will return a list of the name(s) of the journal(s) that has mentioned most unique drugs."""
    graph_link_dict = import_json_file_as_dict(OUTPUT_PATH)
    
    unique_mentions_mapping = {}
    
    for journal_object in graph_link_dict['journals']:
        curr_pubmed_articles, curr_clinical_trials_articles = get_all_articles_from_journal(journal_object)

        set_curr_mentioned_drugs = get_drugs_mentioned_by_journal(
            pubmed_of_journal=curr_pubmed_articles, 
            clinical_trials_of_journal=curr_clinical_trials_articles, 
            return_drug_names=False # Use IDs to be more accurate
        )
        
        unique_mentions_mapping[journal_object['title']] = len(set_curr_mentioned_drugs)


    max_nb_unique_mentions = max(unique_mentions_mapping.values())
    top_journals = [key for key, value in unique_mentions_mapping.items() if value == max_nb_unique_mentions]
    
    logger.info(f"The journal(s) {', '.join(top_journals)} has mentioned {max_nb_unique_mentions} unique drugs")
    
    return top_journals

In [1364]:
def fetch_drugs_mentioned_by_pubmed_journals(drug_name:str) -> List:
    """ 
        This function will, for a specific drugm return a list of all drugs mentioned by the same journals that are only referenced by pubmed articles. 
        The list includes the input drug too
    """    
    graph_link_dict = import_json_file_as_dict(OUTPUT_PATH)
    
    output_drug_mentions = get_drugs_mentioned_by_similar_journals(
        list_journals=graph_link_dict['journals'], 
        drug_name=drug_name.title(), 
        skip_clinical_trials=True
    )
    
    return list(output_drug_mentions)

In [1365]:
fetch_top_journals()

[36m[276210798.py:22 - fetch_top_journals()][0m [32m2024-09-21 19:55:54[0m - INFO - [1mThe journal(s) Journal Of Emergency Nursing, Psychopharmacology, The Journal Of Maternal-Fetal & Neonatal Medicine has mentioned 2 unique drugs[0m


['Journal Of Emergency Nursing',
 'Psychopharmacology',
 'The Journal Of Maternal-Fetal & Neonatal Medicine']

In [1367]:
fetch_drugs_mentioned_by_pubmed_journals(drug_name='BETAMETHASONE')

[36m[1168926539.py:24 - get_drugs_mentioned_by_similar_journals()][0m [32m2024-09-21 19:56:14[0m - INFO - [1mThe drug Betamethasone was mentioned alongside the following drug names `Betamethasone, Atropine` by these non-clinical trials referenced journals : `The Journal Of Maternal-Fetal & Neonatal Medicine, Journal Of Back And Musculoskeletal Rehabilitation`[0m


['Betamethasone', 'Atropine']