In [1]:
import funcy
import difflib
import pandas as pd
import numpy as np
import re
import string
from fuzzywuzzy import fuzz
from os.path import join


RAW_DATA = '/Users/lrraymond13/MIT/Furman_RA_2016/journal_analysis/Data/RawData'
INCITES = 'InCitesJournalGrid.csv'
JOURNAL_KEY = 'Furman_Journal_Key2016.csv'

In [2]:
# 1. Check that the spellings in the JOURNAL KEY uniquely identify a journal. 
# If there are multiple spellings for the same journal, adjust the Journal ID. 
# Specifically, in column AdjustedJournalID enter the same number from column JournalID corresponding
# to one of the spellings for all alternative spellings. When in doubt, leave as separate journals 
# i.e. with separate JournalID numbers.

# 3. Pair the spelling from InCites with the one in the attached file, for each identified unique AdjustedJournalID. 

# 4. Add in the Excel file the InCites spelling, along with Total Cites, Journal Impact Factor and Eigenfactor 
# from InCites.

In [3]:
# import journal key file, check each spelling is unique
jk_df = pd.read_csv(join(RAW_DATA, JOURNAL_KEY))
jk_df.columns = map(lambda x: '_'.join(x.split(' ')).upper(), jk_df.columns)
print jk_df.columns

Index([u'PUBLICATION_TITLE', u'JOURNALID', u'ADJUSTEDJOURNALID',
       u'INCITES_SPELLING'],
      dtype='object')


In [4]:
# Check the length of unique pub titles is same as length of series
def check_unique(uniq_ser):
    # check if dup values in unique series
    unique_titles = uniq_ser.unique()
    return len(unique_titles) == uniq_ser.shape[0]

In [5]:
check_unique(jk_df.loc[:, 'PUBLICATION_TITLE'])

True

In [6]:
# Check if there are any modifications caused by changing journal to uppercase and stripping extraneous chars

In [7]:
# string cleaning functions
def trans_remov_punc(to_change, change_to):
    # removes specified punctuation using string maketrans (very fast, C lookups)
    #returns partially evaluated fnc
    trantab = string.maketrans(to_change, change_to)
    return funcy.func_partial(lambda x: x.translate(trantab))


def standardize_whitespace(pub_str):
    return ' '.join(filter(None, pub_str.split(' ')))


def remove_punc(pub_str):
    # function to remove punctuation
    nonelst = ' '*len(string.punctuation)
    fn = trans_remov_punc(string.punctuation, nonelst)
    new_str = fn(pub_str)
    # standardize spaces
    return standardize_whitespace(new_str)


def has_year(pub_str):
    # checks if string contains a year
    # each string should always have punctuation removed
    if isinstance(pub_str, str):
        pub_str = pub_str.split(' ')
    nums = [int(s) for s in pub_str if s.isdigit()]
    return any(map(lambda x: 1900 < x < 2018, nums))


def seperate_IEEE_IET(fnc_str):
# many strings are title, IEEE while in incites matching file formar is IEEE title. 
    if not isinstance(fnc_str, str):
        strs = ' '.join(fnc_str)
    strs = fnc_str.rsplit(', ', 1)
    return ' '.join(strs[::-1])


def remove_acronym(pub_str):
    # removes acronyms enclosed in parens ex: (SAS)
    re_match = " \(\S+\)"
    if not re.search(re_match, pub_str, re.IGNORECASE):
        return pub_str
    r = re.compile(r" \(\S+\)", re.IGNORECASE)
    new_str = r.sub(r'', pub_str)
    return new_str


def remove_year(pub_str):
    re_match = "\d{4}"
    if not re.search(re_match, pub_str, re.IGNORECASE):
        return pub_str
    r = re.compile(r"\d{4}", re.IGNORECASE)
    new_str = r.sub(r'', pub_str)
    return new_str


def clean_pubname(pub_str):
    if ', ' in pub_str:
        pub_str = seperate_IEEE_IET(pub_str)
    no_ac = remove_acronym(pub_str)
    # now remove punctuation
    no_pnc = remove_punc(no_ac)
    is_year = has_year(no_pnc)
    if is_year:
        # remove year
        no_pnc = remove_year(no_pnc)
    return standardize_whitespace(no_pnc)
    

def is_conf(pub_str, conf_strings=None):
    # checks if string appears ot be a conference
    if conf_strings is None:
        conf_strings = ['SYMPOSIUM', 'CONFERENCE',
                        'COLLOQUIUM', 'CONGRESS', 'ANNUAL', 'FORUM', 'WORKSHOP', 'SEMINAR']
    is_conf = any(map(lambda x: x in pub_str, conf_strings))
    # check if date exists in any of the string
    is_year = has_year(pub_str)
    if is_conf or is_year:
        return 1
    return 0  
    

In [8]:
clean_pubname('Service-Oriented Computing and Applications (SOCA), 2009 IEEE International Conference on')

'IEEE International Conference on Service Oriented Computing and Applications'

In [9]:
jk_df['CLEAN_PUB'] = jk_df['PUBLICATION_TITLE'].apply(clean_pubname)
check_unique(jk_df.loc[:, 'CLEAN_PUB'])

False

In [10]:
# check which are not the same
dups = jk_df[jk_df.duplicated(subset=['CLEAN_PUB'], keep=False)]

In [11]:
dups

Unnamed: 0,PUBLICATION_TITLE,JOURNALID,ADJUSTEDJOURNALID,INCITES_SPELLING,CLEAN_PUB
16,"3D Imaging (IC3D), 2012 International Conferen...",17,,,International Conference on 3D Imaging
17,"3D Imaging (IC3D), 2013 International Conferen...",18,,,International Conference on 3D Imaging
22,"3D Systems Integration Conference (3DIC), 2010...",23,,,IEEE International 3D Systems Integration Conf...
23,"3D Systems Integration Conference (3DIC), 2011...",24,,,IEEE International 3D Systems Integration Conf...
24,"3D Systems Integration Conference (3DIC), 2013...",25,,,IEEE International 3D Systems Integration Conf...
25,"3D User Interfaces (3DUI), 2010 IEEE Symposium on",26,,,IEEE Symposium on 3D User Interfaces
26,"3D User Interfaces (3DUI), 2011 IEEE Symposium on",27,,,IEEE Symposium on 3D User Interfaces
27,"3D User Interfaces (3DUI), 2012 IEEE Symposium on",28,,,IEEE Symposium on 3D User Interfaces
28,"3D User Interfaces (3DUI), 2013 IEEE Symposium on",29,,,IEEE Symposium on 3D User Interfaces
29,"3D User Interfaces (3DUI), 2014 IEEE Symposium on",30,,,IEEE Symposium on 3D User Interfaces


In [19]:
# for each of the duplicates, adjust any subsequent journal ids
standard_dups = jk_df.loc[jk_df.duplicated(subset=['CLEAN_PUB'], keep='last'), ['CLEAN_PUB', 'JOURNALID']]
standard_dups.rename(columns={'JOURNALID': 'DEDUP_JOURNALID'}, inplace=True)
jk_df2 = pd.merge(
    left=jk_df, right=standard_dups.loc[:, ['CLEAN_PUB','DEDUP_JOURNALID']],
                  left_on='CLEAN_PUB', right_on='CLEAN_PUB', how='left')

In [21]:
jk_df2['CONF_FLAG'] = jk_df2['CLEAN_PUB'].apply(is_conf)

In [24]:
incites_df = pd.read_csv(join(RAW_DATA, INCITES), skiprows=1)
incites_df.columns = map(lambda x: '_'.join(x.split(' ')).upper(), incites_df.columns)

In [25]:
# standardize journal title to uppercase and remove punctuation
incites_df['CLEAN_PUB'] = incites_df['FULL_JOURNAL_TITLE'].apply(clean_pubname)

In [26]:
# merge the jk_df2 clean pub name with the incites df
exact_matches = pd.merge(
    left=incites_df, right=jk_df2, left_on='CLEAN_PUB', right_on='CLEAN_PUB', how='inner')

In [27]:
exact_matches

Unnamed: 0,RANK,FULL_JOURNAL_TITLE,TOTAL_CITES,JOURNAL_IMPACT_FACTOR,EIGENFACTOR_SCORE,CLEAN_PUB,PUBLICATION_TITLE,JOURNALID,ADJUSTEDJOURNALID,INCITES_SPELLING,DEDUP_JOURNALID,CONF_FLAG
0,602,IEEE Industrial Electronics Magazine,518,5.303,0.00265,IEEE Industrial Electronics Magazine,"Industrial Electronics Magazine, IEEE",8915,,,,0
1,702,IEEE Transactions on Cybernetics,2246,4.943,0.01017,IEEE Transactions on Cybernetics,"Cybernetics, IEEE Transactions on",3358,,,,0
2,728,IEEE Transactions on Neural Networks and Learn...,12919,4.854,0.02248,IEEE Transactions on Neural Networks and Learn...,"Neural Networks and Learning Systems, IEEE Tra...",11668,,,,0
3,782,IEEE Transactions on Industrial Informatics,3779,4.708,0.01639,IEEE Transactions on Industrial Informatics,"Industrial Informatics, IEEE Transactions on",8970,,,,0
4,1266,IEEE Journal of Photovoltaics,2469,3.736,0.00960,IEEE Journal of Photovoltaics,"Photovoltaics, IEEE Journal of",12495,,,,0
5,1271,IEEE Transactions on Sustainable Energy,2149,3.727,0.01171,IEEE Transactions on Sustainable Energy,"Sustainable Energy, IEEE Transactions on",15045,,,,0
6,1330,IEEE Computational Intelligence Magazine,634,3.647,0.00165,IEEE Computational Intelligence Magazine,"Computational Intelligence Magazine, IEEE",2384,,,,0
7,1738,IEEE Transactions on Smart Grid,4753,3.19,0.02922,IEEE Transactions on Smart Grid,"Smart Grid, IEEE Transactions on",14442,,,,0
8,1796,IEEE Journal of Emerging and Selected Topics i...,473,3.129,0.00305,IEEE Journal of Emerging and Selected Topics i...,Emerging and Selected Topics in Power Electron...,4809,,,,0
9,1950,IEEE Circuits and Systems Magazine,550,3,0.00142,IEEE Circuits and Systems Magazine,"Circuits and Systems Magazine, IEEE",1640,,,,0


In [28]:
not_match = jk_df2.loc[~jk_df2['CLEAN_PUB'].isin(exact_matches['CLEAN_PUB']), :]
not_match.set_index(keys='CLEAN_PUB', drop=True, inplace=True)

not_match_incites = incites_df.loc[~incites_df['CLEAN_PUB'].isin(exact_matches.index), :]
not_match_incites.set_index(keys='CLEAN_PUB', drop=True, inplace=True)

In [29]:
def get_close_matches(x, match_index):
    match = difflib.get_close_matches(x, match_index)
    if len(match) == 0:
        return None
    return match[0]


In [None]:
not_match.index = not_match.index.map(lambda x: get_close_matches(x, not_match_incites.index))

In [124]:
strip_yearly_info('3-D Digital Imaging and Modeling, 2001. Proceedings. Third International Conference on')

set(['!', '#', '"', '%', '$', "'", '&', ')', '+', '*', '-', ',', '/', '.', ';', ':', '=', '<', '?', '>', '@', '[', ']', '\\', '_', '^', '`', '{', '}', '|', '~'])
[]
False


nan

In [116]:
jk_df2[jk_df2.index==12].values

array([[ '3-D Digital Imaging and Modeling, 2001. Proceedings. Third International Conference on',
        13, nan,
        '3D DIGITAL IMAGING AND MODELING 2001 PROCEEDINGS THIRD INTERNATIONAL CONFERENCE ON',
        nan, 1,
        '2001 PROCEEDINGS THIRD INTERNATIONAL CONFERENCE ON 3D DIGITAL IMAGING AND MODELING',
        None]], dtype=object)

In [66]:

jk_df2[jk_df2['CONF_FLAG']==1].to_csv(join(RAW_DATA, 'conferences.csv'))

In [67]:
jk_df2[jk_df2['CONF_FLAG']==0].to_csv(join(RAW_DATA, 'journals_raw.csv'))

In [72]:
exact_matches.shape

(18, 11)

In [76]:
incites_not_matched = incites_df[~incites_df['CLEAN_PUB'].isin(exact_matches.index)]

In [77]:
exact_matches2 = pd.merge(
    left=not_match, right=incites_not_matched, left_on='CLEAN_PUB2', right_on='CLEAN_PUB', how='inner')


In [78]:
incites_not_matched2 = incites_not_matched[~incites_not_matched['CLEAN_PUB'].isin(exact_matches2.index)]
not_match2 = not_match.loc[~not_match['CLEAN_PUB2'].isin(exact_matches2['CLEAN_PUB']), :]

In [82]:
match_key = not_match.loc[pd.notnull(not_match.index), ['Publication Title']]

In [83]:
match_key2 = match_key.reset_index()
match_key2.rename(columns={'index': 'FULL_JOURNAL_TITLE'}, inplace=True)

In [84]:
full_matches = pd.concat([match_key2, exact_matches.loc[:, ['Publication Title', 'FULL_JOURNAL_TITLE']]], axis=0)

In [85]:
full_matches['score'] = full_matches[['Publication Title', 'FULL_JOURNAL_TITLE']].apply(lambda a: fuzz.token_sort_ratio(a[0], a[1]), axis=1)

In [87]:
incites_df

Unnamed: 0_level_0,RANK,TOTAL_CITES,JOURNAL_IMPACT_FACTOR,EIGENFACTOR_SCORE,CLEAN_PUB
FULL_JOURNAL_TITLE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
CA-A CANCER JOURNAL FOR CLINICIANS,1,20488,131.723,0.06261,CAA CANCER JOURNAL FOR CLINICIANS
NEW ENGLAND JOURNAL OF MEDICINE,2,283525,59.558,0.68563,NEW ENGLAND JOURNAL OF MEDICINE
NATURE REVIEWS DRUG DISCOVERY,3,25460,47.12,0.06294,NATURE REVIEWS DRUG DISCOVERY
LANCET,4,195553,44.002,0.40817,LANCET
NATURE BIOTECHNOLOGY,5,48650,43.113,0.15771,NATURE BIOTECHNOLOGY
NATURE REVIEWS IMMUNOLOGY,6,31545,39.416,0.08760,NATURE REVIEWS IMMUNOLOGY
NATURE MATERIALS,7,72306,38.891,0.20799,NATURE MATERIALS
NATURE REVIEWS MOLECULAR CELL BIOLOGY,8,36784,38.602,0.09969,NATURE REVIEWS MOLECULAR CELL BIOLOGY
NATURE,9,627846,38.138,1.44762,NATURE
Annual Review of Astronomy and Astrophysics,10,9000,37.846,0.02021,ANNUAL REVIEW OF ASTRONOMY AND ASTROPHYSICS


In [88]:
# only keep matches where score is above 90
incites_df2 = incites_df.reset_index(drop=False)
full_match2 = pd.merge(
    left=full_matches[full_matches['score'] > 90], right=incites_df2, left_on='FULL_JOURNAL_TITLE', 
    right_on='FULL_JOURNAL_TITLE', how='left')

In [89]:
full_match3 = pd.merge(
    left=full_match2, right=jk_df2[jk_df2['CONF_FLAG']==0], on='Publication Title', how='left')

In [91]:
full_match3

Unnamed: 0,CLEAN_PUB_x,FULL_JOURNAL_TITLE,Publication Title,score,RANK,TOTAL_CITES,JOURNAL_IMPACT_FACTOR,EIGENFACTOR_SCORE,CLEAN_PUB_y,JournalID,InCites Spelling,CLEAN_PUB,AdjustedJournalID,CONF_FLAG
0,,PROCEEDINGS OF THE IEEE,Proceedings of the IEEE,100,528,21036,5.629,0.03984,PROCEEDINGS OF THE IEEE,12912,,PROCEEDINGS OF THE IEEE,,0
1,,EVOLUTIONARY COMPUTATION,Evolutionary Computation,100,1368,2291,3.6,0.0028,EVOLUTIONARY COMPUTATION,5194,,EVOLUTIONARY COMPUTATION,,0
2,,APPLIED PHYSICS LETTERS,Applied Physics Letters,100,1783,222233,3.142,0.38389,APPLIED PHYSICS LETTERS,893,,APPLIED PHYSICS LETTERS,,0
3,,JOURNAL OF APPLIED PHYSICS,Journal of Applied Physics,100,3559,151547,2.101,0.20483,JOURNAL OF APPLIED PHYSICS,10157,,JOURNAL OF APPLIED PHYSICS,,0
4,,NEURAL COMPUTATION,Neural Computation,100,4934,7931,1.626,0.00775,NEURAL COMPUTATION,11644,,NEURAL COMPUTATION,,0
5,,LINGUISTIC INQUIRY,Linguistic Inquiry,100,5354,2055,1.511,0.00215,LINGUISTIC INQUIRY,10284,,LINGUISTIC INQUIRY,,0
6,,COMPUTING IN SCIENCE & ENGINEERING,Computing in Science & Engineering,100,5893,1689,1.361,0.00471,COMPUTING IN SCIENCE ENGINEERING,3019,,COMPUTING IN SCIENCE ENGINEERING,,0
7,,REVIEW OF SCIENTIFIC INSTRUMENTS,Review of Scientific Instruments,100,5992,25970,1.336,0.04791,REVIEW OF SCIENTIFIC INSTRUMENTS,13571,,REVIEW OF SCIENTIFIC INSTRUMENTS,,0
8,,BELL LABS TECHNICAL JOURNAL,Bell Labs Technical Journal,100,6564,380,1.2,0.00142,BELL LABS TECHNICAL JOURNAL,1238,,BELL LABS TECHNICAL JOURNAL,,0
9,,COMPUTER,Computer,100,6922,4148,1.115,0.00623,COMPUTER,2439,,COMPUTER,,0


In [None]:
# for the conferences, check if any of these e

In [98]:
# full_match3[full_match3['CLEAN_PUB_x'].str.contains('TRANSACTIONS ON INDUSTRIAL')]
mag_str = 'SIGNAL PROCESSING MAGAZINE'
incites_df[incites_df['CLEAN_PUB'].str.contains(mag_str)]
jk_df2[jk_df2['CLEAN_PUB'].str.contains(mag_str)]
not_match[not_match['CLEAN_PUB2'].str.contains(mag_str)]
incites_not_matched[incites_not_matched.index.str.contains(mag_str)]
# jk_df2[jk_df2['CONF_FLAG']==0]
# full_match3[full_match3['CLEAN_PUB'].str.contains('SIGNAL PROCESSING')]


Unnamed: 0_level_0,RANK,TOTAL_CITES,JOURNAL_IMPACT_FACTOR,EIGENFACTOR_SCORE,CLEAN_PUB
FULL_JOURNAL_TITLE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
IEEE SIGNAL PROCESSING MAGAZINE,383,6140,6.671,0.01866,IEEE SIGNAL PROCESSING MAGAZINE


In [128]:
incites_df[in]

Unnamed: 0,FULL_JOURNAL_TITLE,Publication Title,score
0,SOCIAL HISTORY OF MEDICINE,A Social History of Engineering,60
1,IEEE AEROSPACE AND ELECTRONIC SYSTEMS MAGAZINE,"Aerospace and Electronic Systems Magazine, IEEE",100
2,IEEE AEROSPACE AND ELECTRONIC SYSTEMS MAGAZINE,"Aerospace and Electronic Systems, IEEE Transac...",79
3,JOURNAL OF BIOMECHANICAL ENGINEERING-TRANSACTI...,"American Institute of Electrical Engineers, Tr...",75
4,IEEE ANNALS OF THE HISTORY OF COMPUTING,Annals of the History of Computing,93
5,IEEE ANNALS OF THE HISTORY OF COMPUTING,"Annals of the History of Computing, IEEE",100
6,IEEE ANTENNAS AND PROPAGATION MAGAZINE,"Antennas and Propagation Magazine, IEEE",100
7,IEEE ANTENNAS AND PROPAGATION MAGAZINE,"Antennas and Propagation, IEEE Transactions on",75
8,IEEE ANTENNAS AND PROPAGATION MAGAZINE,"Antennas and Propagation, IRE Transactions on",71
9,IEEE ANTENNAS AND PROPAGATION MAGAZINE,"Antennas and Wireless Propagation Letters, IEEE",74


In [43]:
# merge match key with exact matches
exact_matches.set_index(keys='CLEAN_PUB', drop=True, inplace=True)
match_key_full = pd.concat([match_key, exact_matches.loc[:, ]])

Unnamed: 0_level_0,Publication Title
CLEAN_PUB,Unnamed: 1_level_1
3D IMAGING AND ANALYSIS OF DEPTHRANGE IMAGES IEE COLLOQUIUM ON,"3D Imaging and Analysis of Depth/Range Images,..."
80216K2007 AMENDMENT TO IEEE STD 8021D2004,802.16k-2007 (Amendment to IEEE Std 802.1D-2004)
80232005COR 22007 CORRIGENDUM TO IEEE STD 80232005,802.3-2005/Cor 2-2007 (Corrigendum to IEEE Std...
A SOCIAL HISTORY OF ENGINEERING,A Social History of Engineering
ACCESS IEEE,"Access, IEEE"
ACOUSTICS SPEECH AND SIGNAL PROCESSING IEEE TRANSACTIONS ON,"Acoustics, Speech and Signal Processing, IEEE ..."
ACTIVE RF DEVICES CIRCUITS AND SYSTEMS SEMINAR,"Active RF Devices, Circuits and Systems Seminar"
ADVANCED PACKAGING IEEE TRANSACTIONS ON,"Advanced Packaging, IEEE Transactions on"
ADVANCES IN RAILWAY COMMUNICATION SYSTEMS DIGEST OF PAPERS REF NO 1997401 IEE,Advances in Railway Communication Systems. Dig...
AEROSPACE AND ELECTRONIC SYSTEMS MAGAZINE IEEE,"Aerospace and Electronic Systems Magazine, IEEE"


In [44]:
exact_matches

Unnamed: 0,RANK,FULL_JOURNAL_TITLE,TOTAL_CITES,JOURNAL_IMPACT_FACTOR,EIGENFACTOR_SCORE,CLEAN_PUB,Publication Title,JournalID,InCites Spelling,AdjustedJournalID,CONF_FLAG
0,528,PROCEEDINGS OF THE IEEE,21036,5.629,0.03984,PROCEEDINGS OF THE IEEE,Proceedings of the IEEE,12912,,,0
1,1368,EVOLUTIONARY COMPUTATION,2291,3.6,0.0028,EVOLUTIONARY COMPUTATION,Evolutionary Computation,5194,,,0
2,1783,APPLIED PHYSICS LETTERS,222233,3.142,0.38389,APPLIED PHYSICS LETTERS,Applied Physics Letters,893,,,0
3,3559,JOURNAL OF APPLIED PHYSICS,151547,2.101,0.20483,JOURNAL OF APPLIED PHYSICS,Journal of Applied Physics,10157,,,0
4,4934,NEURAL COMPUTATION,7931,1.626,0.00775,NEURAL COMPUTATION,Neural Computation,11644,,,0
5,5354,LINGUISTIC INQUIRY,2055,1.511,0.00215,LINGUISTIC INQUIRY,Linguistic Inquiry,10284,,,0
6,5893,COMPUTING IN SCIENCE & ENGINEERING,1689,1.361,0.00471,COMPUTING IN SCIENCE ENGINEERING,Computing in Science & Engineering,3019,,,0
7,5992,REVIEW OF SCIENTIFIC INSTRUMENTS,25970,1.336,0.04791,REVIEW OF SCIENTIFIC INSTRUMENTS,Review of Scientific Instruments,13571,,,0
8,6564,BELL LABS TECHNICAL JOURNAL,380,1.2,0.00142,BELL LABS TECHNICAL JOURNAL,Bell Labs Technical Journal,1238,,,0
9,6922,COMPUTER,4148,1.115,0.00623,COMPUTER,Computer,2439,,,0


In [None]:
not_match['STR_SIM'] = not_match.loc[
    not_match['CONF_FLAG']==0, 'CLEAN_PUB'].apply(lambda x: calculate_str_sim(x, incites_strs))

In [None]:
# incites_df.loc[incites_df['CLEAN_PUB'].str.contains('WORLD OF WIRELESS'), :]
incites_df.loc[incites_df['CLEAN_PUB'].str.contains('WIRELESS'), :]

In [None]:
incites_df

In [24]:
# for each of the nonmatches, calculate string sim to each of journal titles
# use fuzz.token_set_ratio to control for out of order
# ex:
# 100 = fuzz.token_set_ratio('IEEE TRANSACTIONS ON WIRELESS COMMUNICATIONS', 'WIRELESS COMMUNICATIONS IEEE TRANSACTIONS ON')
def calculate_str_sim(journal_str, incites_strs):
    # for each of the incites_str, calculate strs_sim
    max_ratio = (0, None)
    for incite_str in incites_strs:
        r = fuzz.token_set_ratio(journal_str, incite_str)
        if r > max_ratio[0]:
            max_ratio = (r, incite_str)
    return max_ratio
