# Analysis of JCR Impact Factor

Analysis of impact factor of publication in specific field.

E.g. for "remote sensing" the data can be obtained with:

https://plus.cobiss.si/opac7/jcr?kw=remote%20sensing&max=1000

In [None]:
# Libraries
import pandas as pd
import matplotlib.pyplot as plt
from urllib.request import Request, urlopen
import urllib.parse

In [None]:
# Figure size and style
plt.rcParams['figure.figsize'] = (15, 10)

## Procesing parameters

Set the URL parameters, keywords and arguments.

In [None]:
# Parameters
# IF URL
# https://plus.si.cobiss.net/opac7/jcr?py=&ti=&sc=&max=100
if_base_url = 'https://plus.si.cobiss.net/opac7/jcr?py={0}&ti={1}&sc={2}&max=100'
# IF data
if_fn = './data/remote_sensing_if.csv'
# Keywords
journal_kw = 'remote sensing'
# Top journals
top_n = 15
top_if = 2

## Get list of journals for last year

In [None]:
# Find last year data is published
s_year = urllib.parse.quote('')
s_kw = urllib.parse.quote('')
s_issn = urllib.parse.quote('2168-6831')
if_url = if_base_url.format(s_year, s_kw, s_issn)
# Read webpage
req = Request(if_url, headers={'User-Agent': 'Mozilla/5.0'})
webpage = urlopen(req).read()
if_rs_df = pd.read_html(webpage)[0]

In [None]:
year = if_rs_df['Leto'].max()
year

In [None]:
print('Getting list of journals for {}'.format(year))
s_year = urllib.parse.quote(str(year))
s_kw = urllib.parse.quote(journal_kw)
s_issn = urllib.parse.quote('')
if_url = if_base_url.format(s_year, s_kw, s_issn)
# Read webpage
req = Request(if_url, headers={'User-Agent': 'Mozilla/5.0'})
webpage = urlopen(req).read()
# Read IF
if_rs_df = pd.read_html(webpage)[0]
if_rs_df.head()

In [None]:
# Drop unneeded columns
if_rs_df = if_rs_df.drop(['Št.'], axis=1)
if_rs_df

## Top journals

In [None]:
# Top n journals
if_rs_df.sort_values('Faktor vpliva', ascending=False).head(top_n)

In [None]:
# IF larger then threshold
if_rs_df = if_rs_df.loc[if_rs_df['Faktor vpliva'] >= top_if]
if_rs_df

## ISSNs of top journals

In [None]:
# Get ISSNs
issn_list = if_rs_df['ISSN'].str.split('/').str[0].to_list()
issn_list

## Get IF for top journals by ISSNs

Read IF for top journals by ISSNs for the years 2000 and forward.

In [None]:
# Create empty DF
if_df = pd.DataFrame()

In [None]:
for issn in issn_list:
    print('Reading IF for {}'.format(issn))
    s_year = urllib.parse.quote('')
    s_kw = urllib.parse.quote('')
    s_issn = urllib.parse.quote(issn)
    if_url = if_base_url.format(s_year, s_kw, s_issn)
    # Read webpage
    req = Request(if_url, headers={'User-Agent': 'Mozilla/5.0'})
    webpage = urlopen(req).read()
    # Read IF
    df = pd.read_html(webpage)[0]
    df.drop(['Št.'], axis=1, inplace=True)

    # Only after 2000
    df = df.loc[df['Leto'] >= 2000]

    # Append new DF
    if_df = pd.concat([if_df, df])

In [None]:
if_df.head()

In [None]:
# Save the dataframe
if_df.to_csv(if_fn, index=False)

In [None]:
# Drop columns
if_df.drop('ISSN', inplace=True, axis=1)

## Publications by years and ISSNs

In [None]:
if_wide = if_df.groupby(['Leto', 'Naslov serijske publikacije']).agg('sum')
if_wide

In [None]:
if_wide = if_wide.unstack()

In [None]:
if_wide = if_wide.droplevel(level=0, axis=1)

In [None]:
if_wide

In [None]:
if_wide = if_wide.T.sort_values(last, ascending=False).T

## Top journals in the field in the last year

In [None]:
if_top_ylast = if_wide.loc[year]

In [None]:
print(if_top_ylast)

In [None]:
# Plot
if_wide.plot(marker='.')
plt.title('Remote Sensing IF by Year')
plt.legend(loc='upper left', frameon=False)
plt.xlabel('Year')
plt.ylabel('IF')
plt.xticks([2000, 2005, 2010, 2015, 2020])
plt.tight_layout()
plt.show()