In [107]:
#import libraries
import pandas as pd
import matplotlib.pyplot as plt
import os

## Load Files

In [72]:
# Read all columns and concatenate them into a single dataframe
JIF_df = pd.read_csv(f"IFs_df.csv", header = 0, sep="\t", index_col=0)
Abs_df = pd.read_csv(f"abstracts_df.csv", header = 0, sep="\t", index_col=0)
print(f'JIFs: {len(JIF_df)}; Abs: {len(Abs_df)}')

In [174]:
# Join files on ISSN, drop duplicates
new_df = pd.merge(JIF_df, Abs_df[Abs_df['ISSN'].notna()], how = 'right', on=['ISSN', 'Year Published'])
new_df.drop_duplicates(subset=['Document Title'], keep='first', inplace=True, ignore_index=True)
# Join files on eISSN, drop duplicates
new_df_1 = pd.merge(JIF_df, Abs_df[Abs_df['ISSN'].isna()], how = 'right', on=['eISSN', 'Year Published'])
new_df_1.drop_duplicates(subset=['Document Title'], keep='first', inplace=True, ignore_index=True)

# Conctenate joins, drop extra columns
join_df = pd.concat([new_df, new_df_1], axis = 0)
join_df.drop(columns = ['eISSN_x', 'eISSN_y', 'ISSN_x', 'ISSN_y'], inplace=True)
join_df['Year Published'] = join_df['Year Published'].astype(int)
len(join_df)

6302

In [175]:
#Find journals with missing JIF
missing  = join_df[join_df['JIF'].isna()].groupby(['Publication Name'])['Publication Name'].count().sort_values(ascending=False)
missing.to_csv('missing.csv', '\t')

In [176]:
# Get a list of files in the missing folder
file_list=os.listdir('./raw_data/missing')

In [177]:
new_journals = []
for file_name in file_list:
    file_name = f"./raw_data/missing/{file_list[0]}"
    with open(file_name) as f:
        j_name= f.readline().strip()
    df = pd.read_csv(file_name, header = 4, sep=",", index_col=False)
    df['Publication Name'] = j_name
    df['Year'] = pd.to_numeric(df['Year'], errors='coerce')
    df.drop(df[df['Journal impact factor'].isna()].index, inplace=True)
    df['Year'] = df['Year'].astype(int)
    df = df[['Year', 'Journal impact factor', 'Publication Name']]
    df.columns = ['Year Published', 'JIF_new', 'Publication Name']
    new_journals.append(df)

New_IFs_df = pd.concat(new_journals, axis = 0)

In [160]:

new_join_df.drop_duplicates(subset=['Document Title'], keep='first', inplace=True, ignore_index=True)
new_join_df.drop(columns = ['JIF_x'], inplace=True)
new_join_df = new_join_df.rename(columns={'JIF_y':'JIF'})

In [179]:
new_join_df = pd.merge(New_IFs_df, join_df[join_df['JIF'].isna()], how = 'right', on=['Year Published', 'Publication Name'])

In [180]:
new_join_df[]

Unnamed: 0,Year Published,JIF_new,Publication Name,Journal name,ISSN,JIF,Eigenfactor,Authors,Author Full Name,Document Title,...,ISO Source Abbreviation,Publication Date,Volume,Issue,Beginning Page,Ending Page,Digital Object Identifier (DOI),Page Count,Web of Science Categories,eISSN
0,2000,,TRENDS IN PHARMACOLOGICAL SCIENCES,,0165-6147,,,"Lazareno, S; Birdsall, NJM","Lazareno, S; Birdsall, NJM",Effects of contamination on radioligand bindin...,...,Trends Pharmacol. Sci.,FEB,21,2,57,60,10.1016/S0165-6147(99)01412-1,4,Pharmacology & Pharmacy,
1,2017,,SCIENTIFIC REPORTS,,2045-2322,,,"Phan, JA; Landau, AM; Jakobsen, S; Gjedde, A","Phan, Jenny-Ann; Landau, Anne M.; Jakobsen, St...",Radioligand binding analysis of α<sub>2</sub> ...,...,Sci Rep,NOV 22,7,,,,10.1038/s41598-017-16020-1,17,Multidisciplinary Sciences,
2,2020,,NUKLEARMEDIZIN-NUCLEAR MEDICINE,,0029-5566,,,"Götz, TI; Lang, EW; Prante, O; Cordes, M; Kuwe...","Goetz, Theresa Ida; Lang, Elmar Wolfgang; Pran...",Estimation of [<SUP>177</SUP>Lu]PSMA-617 tumor...,...,Nuklearmedizin,SEP,59,05,365,374,10.1055/a-1204-9932,10,"Radiology, Nuclear Medicine & Medical Imaging",
3,2017,,JOURNAL OF PHARMACOLOGICAL AND TOXICOLOGICAL M...,,1056-8719,,,"Allikalt, A; Rinken, A","Allikalt, Anni; Rinken, Ago",Budded baculovirus particles as a source of me...,...,J. Pharmacol. Toxicol. Methods,JUL-AUG,86,,81,86,10.1016/j.vascn.2017.04.004,6,Pharmacology & Pharmacy; Toxicology,
4,2021,,PHARMACOLOGICAL REPORTS,,1734-1140,,,"Khoramjouy, M; Ahmadi, F; Faizi, M; Shahhossei...","Khoramjouy, Mona; Ahmadi, Fatemeh; Faizi, Mehr...",Optimization binding studies of opioid recepto...,...,Pharmacol. Rep.,OCT,73,5,1390,1395,10.1007/s43440-021-00265-9,6,Pharmacology & Pharmacy,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3804,2023,,CANCERS,,,,,"Durma, AD; Saracyn, M; Kolodziej, M; Józwik-Pl...","Durma, Adam Daniel; Saracyn, Marek; Kolodziej,...",Epidemiology of Neuroendocrine Neoplasms and R...,...,Cancers,NOV,15.0,22.0,,,10.3390/cancers15225466,21,Oncology,2072-6694
3805,2022,,BRAIN SCIENCES,,,,,"Brasic, JR; Goodman, JA; Nandi, A; Russell, DS...","Brasic, James Robert; Goodman, Jack Alexander;...",Fragile X Mental Retardation Protein and Cereb...,...,Brain Sci.,MAR,12.0,3.0,,,10.3390/brainsci12030314,19,Neurosciences,2076-3425
3806,2022,,BRAIN COMMUNICATIONS,,,,,"Nylund, M; Sucksdorff, M; Matilainen, M; Polvi...","Nylund, Marjo; Sucksdorff, Marcus; Matilainen,...",Phenotyping of multiple sclerosis lesions acco...,...,Brain Commun.,JAN 4,4.0,1.0,,,10.1093/braincomms/fcab301,15,Clinical Neurology; Neurosciences,2632-1297
3807,2023,,EJNMMI RADIOPHARMACY AND CHEMISTRY,,,,,"Ikenuma, H; Ogata, A; Koyama, H; Ji, B; Ishii,...","Ikenuma, Hiroshi; Ogata, Aya; Koyama, Hiroko; ...",Synthesis and evaluation of a novel PET ligand...,...,EJNMMI Radiopharm. Chem.,OCT 18,8.0,1.0,,,10.1186/s41181-023-00217-z,16,"Chemistry, Medicinal; Chemistry, Inorganic & N...",2365-421X


In [161]:
new_join_df[new_join_df['JIF'].isna()].groupby(['Publication Name'])['Publication Name'].count().sort_values(ascending=False)

Publication Name
EUROPEAN JOURNAL OF PHARMACOLOGY                         160
BRITISH JOURNAL OF PHARMACOLOGY                          133
JOURNAL OF PHARMACOLOGY AND EXPERIMENTAL THERAPEUTICS    131
SYNAPSE                                                  125
JOURNAL OF LABELLED COMPOUNDS & RADIOPHARMACEUTICALS      96
                                                        ... 
HUMAN VACCINES & IMMUNOTHERAPEUTICS                        1
HUMAN REPRODUCTION                                         1
HUMAN PATHOLOGY                                            1
HUMAN IMMUNOLOGY                                           1
ZOOLOGICAL SCIENCE                                         1
Name: Publication Name, Length: 806, dtype: int64

Unnamed: 0,JIF_x,JIF_y
0,,
1,,2.538
2,,
3,,2.333
4,,2.333
5,,11.082
6,,2.774
7,,
8,,4.281
9,,
