# Import all the needed libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set_style('whitegrid')

# Import Dataset 

In [2]:
journal = pd.read_csv('Scopus Journals.csv') 

journal.head(2)  

# Exploratory Data Analysis

## Check the Correlation between the features of the dataset

plt.figure(figsize=(10,6))
sns.heatmap(journal.corr(),cmap='coolwarm', annot=True, vmin=0.25)

# Select Computer Science Subject Area

In [3]:
asjc_code = journal['Scopus ASJC Code'].apply(str) 
journal = journal[asjc_code.apply(lambda x: x[:2]) == '17'] 

# Select Computer Science Journal

In [4]:
not_journal = journal.loc[journal['Type'] != 'Journal']
journal = journal.drop(not_journal.index, axis=0)
journal.index=range(len(journal))

journal.head(2)

# Check for empty/null cells in the dataset

sns.heatmap(journal.isnull(),yticklabels=False,cbar=False,cmap='viridis')
plt.tight_layout

# Working on the Index Column

In [5]:
journal['Index'] = ['Scopus']*len(journal)

# Working on the Publisher Column

In [6]:
journal['Publisher2'] = journal['Publisher']
publisher = journal['Publisher2']

#Taylor and Francis
pub_taylor = publisher[publisher.apply(lambda x: x[:6]) == 'Taylor'].unique().tolist()
for i in publisher:
    if i in pub_taylor:
        a = publisher.replace(i,"Taylor and Francis", inplace=True)
    
#Inderscience
pub_indersci = publisher[publisher.apply(lambda x: x[:12]) == 'Inderscience'].unique().tolist()
for i in publisher:
    if i in pub_indersci:
        a = publisher.replace(i,"Inderscience", inplace=True)

#ACM
pub_acm = publisher[publisher.apply(lambda x: x[:41]) == 'Association for Computing Machinery (ACM)'].unique().tolist()
for i in publisher:
    if i in pub_acm:
        a = publisher.replace(i,"ACM", inplace=True)
        
#Elsevier
pub_elsevier = publisher[publisher.apply(lambda x: x[:8]) == 'Elsevier'].unique().tolist()
for i in publisher:
    if i in pub_elsevier:
        a = publisher.replace(i,"Elsevier", inplace=True)
        
#Springer
pub_springer = publisher[publisher.apply(lambda x: x[:8]) == 'Springer'].unique().tolist()
for i in publisher:
    if i in pub_springer:
        a = publisher.replace(i,"Springer", inplace=True)
        
#IEEE1
pub_ieee1 = publisher[publisher.apply(lambda x: x[:4]) == 'IEEE'].unique().tolist()
for i in publisher:
    if i in pub_ieee1:
        a = publisher.replace(i,"IEEE", inplace=True)

#IEEE2
pub_ieee2 = publisher[publisher.apply(lambda x: x[:49]) == 'Institute of Electrical and Electronics Engineers'].unique().tolist()
for i in publisher:
    if i in pub_ieee2:
        a = publisher.replace(i,"IEEE", inplace=True)

#Others
main_publiser = publisher.value_counts().head(6)
main_publiser = main_publiser.index.tolist()
for i in publisher:
    if i not in main_publiser:
        a = publisher.replace(i, "Others", inplace=True)

# Working on the Percentile Column

In [7]:
journal['Percentile2'] = journal['Percentile']
percent = journal['Percentile2']

for i in percent:
    if(i>=0 and i<=24):
        a = percent.replace(i, 4, inplace=True)

for i in percent:
    if(i>=25 and i<=49):
        a = percent.replace(i, 3, inplace=True)
        
for i in percent:
    if(i>=50 and i<=74):
        a = percent.replace(i, 2, inplace=True)

for i in percent:
    if(i>=75 and i<=99):
        a = percent.replace(i, 1, inplace=True)

# Joining the Scraping Data Journals

In [8]:
scrap_journal = pd.read_csv('Scraping Journals.csv')
journal = journal.join(scrap_journal)

journal.head(2)

# Working on the Frequency Column

In [9]:
journal['frequency'] = journal['frequency'].fillna(value='Bi-monthly')

# Working on the Open Access Column

In [10]:
journal['Open Access2'] = journal['Open Access']
open_access = journal['Open Access2']

for i in open_access:
    if(i=="YES"):
        a = open_access.replace(i, "Yes", inplace=True)

for i in open_access:
    if(i=="NO"):
        a = open_access.replace(i, "No", inplace=True)

# Working on the Print ISSN Column

In [11]:
journal['print_issn2'] = journal['print_issn']
issn = journal['print_issn2']

# Make the length up to 8
issn_len = 8
for i in issn:
    if pd.isnull(i):
        new_issn = i
    elif len(i) != issn_len:
        z = issn_len - len(i)
        new_issn = str("0"*z)+(i)
        value = issn.replace(i, new_issn, inplace=True)
    else:
        new_issn = i
        
        
# Include the dash
for i in issn:
    if pd.isnull(i):
        new_issn = i
    elif len(i) == 8:
        hyphen = i[:4] + '-' + i[4:]
        value = issn.replace(i, hyphen, inplace=True)  

# Working on the E-ISSN Column

In [12]:
journal['e_issn2'] = journal['e_issn']
eissn = journal['e_issn2']

# Make the length up to 8
eissn_len = 8
for i in eissn:
    if pd.isnull(i):
        new_eissn = i
    elif len(i) != eissn_len:
        z = eissn_len - len(i)
        new_eissn = str("0"*z)+(i)
        value = eissn.replace(i, new_eissn, inplace=True)
    else:
        new_eissn = i
        
        
# Include the dash
for i in eissn:
    if pd.isnull(i):
        new_eissn = i
    elif len(i) == 8:
        hyphen = i[:4] + '-' + i[4:]
        value = eissn.replace(i, hyphen, inplace=True)  

In [13]:
journal.head(2)

Unnamed: 0,Scopus Source ID,Title,CiteScore,Percentile,Citation\nCount,Scholarly\nOutput,Percent\nCited,SNIP,SJR,RANK,...,Percentile2,scopus_source_id,print_issn,e_issn,frequency,review_time,journal_website,Open Access2,print_issn2,e_issn2
0,11900154400,International Journal of Information Technolog...,1.29,60,63,49,63,0.635,0.191,82,...,2,11900154400,15541045,15541053,Quarterly,,https://www.igi-global.com/gateway/journal/1093,No,1554-1045,1554-1053
1,12100154817,International Journal of Wireless and Mobile C...,0.49,22,137,280,30,0.252,0.168,160,...,4,12100154817,17411084,17411092,Bi-monthly,,https://www.inderscience.com/jhome.php?jcode=i...,No,1741-1084,1741-1092


In [14]:
journal.columns

Index(['Scopus Source ID', 'Title', 'CiteScore', 'Percentile',
       'Citation\nCount', 'Scholarly\nOutput', 'Percent\nCited', 'SNIP', 'SJR',
       'RANK', 'Rank\nOut Of', 'Publisher', 'Type', 'Open Access',
       'Scopus ASJC Code', 'Scopus Sub-Subject Area', 'Quartile',
       'Top 10% (CiteScore Percentile)', 'Scopus Source ID.1', 'Print ISSN',
       'E-ISSN', 'Index', 'Publisher2', 'Percentile2', 'scopus_source_id',
       'print_issn', 'e_issn', 'frequency', 'review_time', 'journal_website',
       'Open Access2', 'print_issn2', 'e_issn2'],
      dtype='object')

In [15]:
journal = journal.drop(['Print ISSN','E-ISSN','print_issn','e_issn','scopus_source_id'], axis=1)

In [16]:
journal.rename(columns = {'Scopus Source ID': 'scopus_source_id', 
                          'Title': 'title',
                          'CiteScore': 'citescore',
                          'Percentile': 'percentile',
                          'Citation\nCount': 'citation_count',
                          'Scholarly\nOutput': 'scholarly_output', 
                          'Percent\nCited': 'percent_cited',
                          'SNIP': 'snip',
                          'SJR': 'sjr',
                          'RANK': 'rank',
                          'Rank\nOut Of': 'rank_outof',
                          'Publisher': 'publisher',
                          'Type': 'type',
                          'Open Access': 'open_access',
                          'Scopus ASJC Code': 'scopus_asjc_code',
                          'Scopus Sub-Subject Area': 'subject_area', 
                          'Quartile': 'quartile',
                          'Top 10% (CiteScore Percentile)': 'top_10%',
                          'Scopus Source ID.1': 'scopus_link',
                          'Index': 'index',
                          'Publisher2': 'publisher2',
                          'Percentile2': 'percentile2',
                          'Frequency': 'frequency',
                          'Open Access2': 'open_access2',
                          'print_issn2': 'print_issn',
                          'e_issn2': 'e_issn',
                         }, 
               inplace=True)

In [17]:
#create a new csv file to save the new dataset
journal.to_csv('Ranking Scopus Journals.csv', index=False)