
# Import all the needed libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set_style('whitegrid')

# Import Datasets

In [2]:
scopus = pd.read_csv('Scopus Journals.csv') 
scieisi = pd.read_csv('SCIEISI Journals.csv')
scrap_journal = pd.read_csv('Scraping Journals.csv')

# Scopus Dataset

In [3]:
#Select Computer Science Subject Area
asjc_code = scopus['Scopus ASJC Code'].apply(str) 
scopus = scopus[asjc_code.apply(lambda x: x[:2]) == '17'] 

#Select Computer Science Journals
not_scopus_journal = scopus.loc[scopus['Type'] != 'Journal']
scopus = scopus.drop(not_scopus_journal.index, axis=0)

#Renumbering the index
scopus.index=range(len(scopus))

#Joining Scrapped Data
scopus = scopus.join(scrap_journal)
scopus = scopus.drop(['Print ISSN','E-ISSN','scopus_source_id'], axis=1)

#Working on the Print ISSN Column
scopus['Print ISSN'] = scopus['print_issn']
issn = scopus['Print ISSN']
issn_len = 8
for i in issn:
    if pd.isnull(i):
        new_issn = i
    elif len(i) != issn_len:
        z = issn_len - len(i)
        new_issn = str("0"*z)+(i)
        value = issn.replace(i, new_issn, inplace=True)
    else:
        new_issn = i
for i in issn:
    if pd.isnull(i):
        new_issn = i
    elif len(i) == 8:
        hyphen = i[:4] + '-' + i[4:]
        value = issn.replace(i, hyphen, inplace=True)  
        

# Working on the E-ISSN Column
scopus['E-ISSN'] = scopus['e_issn']
eissn = scopus['E-ISSN']
eissn_len = 8
for i in eissn:
    if pd.isnull(i):
        new_eissn = i
    elif len(i) != eissn_len:
        z = eissn_len - len(i)
        new_eissn = str("0"*z)+(i)
        value = eissn.replace(i, new_eissn, inplace=True)
    else:
        new_eissn = i
for i in eissn:
    if pd.isnull(i):
        new_eissn = i
    elif len(i) == 8:
        hyphen = i[:4] + '-' + i[4:]
        value = eissn.replace(i, hyphen, inplace=True) 

scopus = scopus.drop(['print_issn','e_issn'], axis=1)

#Adding the Index Column
scopus['Index'] = ['Scopus']*len(scopus)

scopus.head(2)

Unnamed: 0,Scopus Source ID,Title,CiteScore,Percentile,Citation\nCount,Scholarly\nOutput,Percent\nCited,SNIP,SJR,RANK,...,Scopus Sub-Subject Area,Quartile,Top 10% (CiteScore Percentile),Scopus Source ID.1,frequency,review_time,journal_website,Print ISSN,E-ISSN,Index
0,11900154400,International Journal of Information Technolog...,1.29,60,63,49,63,0.635,0.191,82,...,General Computer Science,Quartile2,,https://www.scopus.com/sourceid/11900154400,Quarterly,,https://www.igi-global.com/gateway/journal/1093,1554-1045,1554-1053,Scopus
1,12100154817,International Journal of Wireless and Mobile C...,0.49,22,137,280,30,0.252,0.168,160,...,General Computer Science,Quartile4,,https://www.scopus.com/sourceid/12100154817,Bi-monthly,,https://www.inderscience.com/jhome.php?jcode=i...,1741-1084,1741-1092,Scopus


# SCIEISI Dataset

In [4]:
#Remove null catergories rows
null_categories = scieisi.loc[scieisi['Web of Science Categories'].isnull()]
scieisi = scieisi.drop(null_categories.index, axis=0)

#Remove null print_issn rows
null_issn = scieisi.loc[scieisi['ISSN'].isnull()]
scieisi = scieisi.drop(null_issn.index, axis=0)

#Filter Computer Science Journals
scieisi['Catergories'] = [catergories.replace(' | ', ', ').split(', ') for catergories in scieisi['Web of Science Categories']]
scieisi = scieisi[scieisi['Catergories'].apply(lambda x: "Computer Science" in x)] 

#Renumbering the index
scieisi.index=range(len(scieisi))

#Drop unwanted columns
scieisi = scieisi.drop(['Publisher address', 'Languages', 'Catergories'], axis=1)

#Renaming columns
scieisi.rename(columns = {'Journal title': 'Title', 'ISSN': 'Print ISSN', 'eISSN': 'E-ISSN', 'Publisher name': 'Publisher'}, 
               inplace=True)

#Adding the Index Column
scieisi['Index'] = ['SCIEISI']*len(scieisi)

scieisi.head(2)

Unnamed: 0,Title,Print ISSN,E-ISSN,Publisher,Web of Science Categories,Index
0,ACM COMPUTING SURVEYS,0360-0300,1557-7341,ASSOC COMPUTING MACHINERY,"Computer Science, Theory & Methods",SCIEISI
1,ACM JOURNAL ON COMPUTING AND CULTURAL HERITAGE,1556-4673,1556-4711,ASSOC COMPUTING MACHINERY,"Computer Science, Interdisciplinary Applications",SCIEISI


# Merge Dataset

In [5]:
scopus_issn = scopus['Print ISSN']
scieisi_issn = scieisi['Print ISSN']

present_journals = []
for i in scieisi_issn:
    if i in scopus_issn.unique().tolist():
        present_journals.append(i)
        
merge_journals = []
for i in present_journals:
    merge = scopus.loc[(scopus['Print ISSN'] == i)].copy()
    merge.loc[scopus_issn == i, 'Index']= "SCIEISI"
    merge_journals.append(merge)
    
journal = scopus.append(merge_journals, ignore_index=True)

# Working on the Publisher Column

In [6]:
journal['Publisher2'] = journal['Publisher']
publisher = journal['Publisher2']

#Taylor and Francis
pub_taylor = publisher[publisher.apply(lambda x: x[:6]) == 'Taylor'].unique().tolist()
for i in publisher:
    if i in pub_taylor:
        a = publisher.replace(i,"Taylor and Francis", inplace=True)
    
#Inderscience
pub_indersci = publisher[publisher.apply(lambda x: x[:12]) == 'Inderscience'].unique().tolist()
for i in publisher:
    if i in pub_indersci:
        a = publisher.replace(i,"Inderscience", inplace=True)

#ACM
pub_acm = publisher[publisher.apply(lambda x: x[:41]) == 'Association for Computing Machinery (ACM)'].unique().tolist()
for i in publisher:
    if i in pub_acm:
        a = publisher.replace(i,"ACM", inplace=True)
        
#Elsevier
pub_elsevier = publisher[publisher.apply(lambda x: x[:8]) == 'Elsevier'].unique().tolist()
for i in publisher:
    if i in pub_elsevier:
        a = publisher.replace(i,"Elsevier", inplace=True)
        
#Springer
pub_springer = publisher[publisher.apply(lambda x: x[:8]) == 'Springer'].unique().tolist()
for i in publisher:
    if i in pub_springer:
        a = publisher.replace(i,"Springer", inplace=True)
        
#IEEE1
pub_ieee1 = publisher[publisher.apply(lambda x: x[:4]) == 'IEEE'].unique().tolist()
for i in publisher:
    if i in pub_ieee1:
        a = publisher.replace(i,"IEEE", inplace=True)

#IEEE2
pub_ieee2 = publisher[publisher.apply(lambda x: x[:49]) == 'Institute of Electrical and Electronics Engineers'].unique().tolist()
for i in publisher:
    if i in pub_ieee2:
        a = publisher.replace(i,"IEEE", inplace=True)

#Others
main_publiser = publisher.value_counts().head(6)
main_publiser = main_publiser.index.tolist()
for i in publisher:
    if i not in main_publiser:
        a = publisher.replace(i, "Others", inplace=True)

# Working on the Percentile Column

In [7]:
journal['Percentile2'] = journal['Percentile']
percent = journal['Percentile2']

for i in percent:
    if(i>=0 and i<=24):
        a = percent.replace(i, 400, inplace=True)

for i in percent:
    if(i>=25 and i<=49):
        a = percent.replace(i, 300, inplace=True)
        
for i in percent:
    if(i>=50 and i<=74):
        a = percent.replace(i, 200, inplace=True)

for i in percent:
    if(i>=75 and i<=99):
        a = percent.replace(i, 100, inplace=True)

0    200
1    400
2    100
3    200
4    100
Name: Percentile2, dtype: int64


# Working on the Frequency Column

In [8]:
journal['Frequency2'] = journal['frequency'].fillna(value='Bi-monthly')

# Working on the Open Access Column

In [9]:
journal['Open Access2'] = journal['Open Access']
open_access = journal['Open Access2']

for i in open_access:
    if(i=="YES"):
        a = open_access.replace(i, "Yes", inplace=True)

for i in open_access:
    if(i=="NO"):
        a = open_access.replace(i, "No", inplace=True)

In [10]:
journal.columns

Index(['Scopus Source ID', 'Title', 'CiteScore', 'Percentile',
       'Citation\nCount', 'Scholarly\nOutput', 'Percent\nCited', 'SNIP', 'SJR',
       'RANK', 'Rank\nOut Of', 'Publisher', 'Type', 'Open Access',
       'Scopus ASJC Code', 'Scopus Sub-Subject Area', 'Quartile',
       'Top 10% (CiteScore Percentile)', 'Scopus Source ID.1', 'frequency',
       'review_time', 'journal_website', 'Print ISSN', 'E-ISSN', 'Index',
       'Publisher2', 'Percentile2', 'Frequency2', 'Open Access2'],
      dtype='object')

In [11]:
journal = journal.drop(['frequency'], axis=1)

In [12]:
journal.rename(columns = {'Scopus Source ID': 'scopus_source_id', 
                          'Title': 'title',
                          'CiteScore': 'citescore',
                          'Percentile': 'percentile',
                          'Citation\nCount': 'citation_count',
                          'Scholarly\nOutput': 'scholarly_output', 
                          'Percent\nCited': 'percent_cited',
                          'SNIP': 'snip',
                          'SJR': 'sjr',
                          'RANK': 'rank',
                          'Rank\nOut Of': 'rank_outof',
                          'Publisher': 'publisher',
                          'Type': 'type',
                          'Open Access': 'open_access',
                          'Scopus ASJC Code': 'scopus_asjc_code',
                          'Scopus Sub-Subject Area': 'subject_area', 
                          'Quartile': 'quartile',
                          'Top 10% (CiteScore Percentile)': 'top_10%',
                          'Scopus Source ID.1': 'scopus_link',
                          'Index': 'index',
                          'Print ISSN': 'print_issn',
                          'E-ISSN': 'e_issn',
                          'Publisher2': 'publisher2',
                          'Percentile2': 'percentile2',
                          'Frequency2': 'frequency',
                          'Open Access2': 'open_access2',
                         }, 
               inplace=True)

In [13]:
journal.columns

Index(['scopus_source_id', 'title', 'citescore', 'percentile',
       'citation_count', 'scholarly_output', 'percent_cited', 'snip', 'sjr',
       'rank', 'rank_outof', 'publisher', 'type', 'open_access',
       'scopus_asjc_code', 'subject_area', 'quartile', 'top_10%',
       'scopus_link', 'review_time', 'journal_website', 'print_issn', 'e_issn',
       'index', 'publisher2', 'percentile2', 'frequency', 'open_access2'],
      dtype='object')

In [14]:
column_name = ['scopus_source_id', 'title', 'citescore', 'percentile','citation_count', 'scholarly_output', 
               'percent_cited', 'snip', 'sjr', 'rank', 'rank_outof', 'publisher', 'type', 'open_access',
               'scopus_asjc_code', 'subject_area', 'quartile', 'top_10%','scopus_link', 'index', 'publisher2', 
               'percentile2', 'frequency', 'journal_website', 'review_time', 'open_access2', 'print_issn', 'e_issn']

In [15]:
journal = journal.reindex(columns=column_name)

In [16]:
#create a new csv file to save the new dataset
journal.to_csv('Ranking Journals.csv', index=False)