# Analysis of JCR Impact Factor

Analysis of impact factor of publication in specific field.

E.g. for "remote sensing" the data can be obtained with:

https://plus.cobiss.si/opac7/jcr?kw=remote%20sensing&max=1000

In [None]:
# Libraries
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

In [None]:
# Figure size and style
plt.rcParams['figure.figsize'] = (15, 10)

In [None]:
# Processing parameters
# Field
keyword = "remote sensing"
# Number of top journals
j_top_n = 10
# Last years
j_top_years = 5
# Number of juornals
j_top_years_n = 6
# Search by title or keyword
search_by = "kw="
# search_by = "ti="
# Smoothing
smooth_plot = True

## Read data

In [None]:
# Cobiss base URL
cobiss_url = "https://plus.cobiss.si/opac7/jcr?"
cobiss_param = "&max=1000"

In [None]:
# Read data
cobiss_jcr = cobiss_url + search_by + keyword + cobiss_param
cobiss_jcr = cobiss_jcr.replace(" ", "%20") # replace spaces
jcr_if = pd.read_html(cobiss_jcr)[0] # Take first table

In [None]:
jcr_if = pd.read_html(cobiss_jcr)[0] # Take first table
jcr_if.head()

## Basic journal info

In [None]:
jcr_if.describe()

In [None]:
# List of journals
jcr_if_journals = jcr_if[["ISSN", "Naslov serijske publikacije"]]
jcr_if_journals = jcr_if_journals.drop_duplicates().reset_index(drop=True)
jcr_if_journals

## Top journals in the field in the last jear

In [None]:
# Top 10 journals in last year
ylast = jcr_if["Leto"].max()
jcr_if_ylast = jcr_if[jcr_if["Leto"] == ylast]
jcr_if_ylast.nlargest(10, 'Faktor vpliva')[["Naslov serijske publikacije", "ISSN"]]

In [None]:
jcr_if_top_ylast = jcr_if_ylast.nlargest(j_top_n, 'Faktor vpliva')["ISSN"]

In [None]:
# Drop unneeded columns
jcr_if_issn = jcr_if[["ISSN", "Leto", "Faktor vpliva"]]

In [None]:
# Select only top publications
jcr_if_top = jcr_if_issn[jcr_if_issn['ISSN'].isin(jcr_if_top_ylast)]

In [None]:
# Group by publication ISSN
jcr_if_top_grouped = jcr_if_top.groupby(["Leto", "ISSN"]).mean().unstack()
jcr_if_top_grouped.columns = jcr_if_top_grouped.columns.get_level_values(1)

In [None]:
jcr_if_top_grouped

In [None]:
# Change column names
for val in jcr_if_top_grouped.columns:
    rep = jcr_if_journals[jcr_if_journals["ISSN"] == val]["Naslov serijske publikacije"].values[0]
    print(val, rep)
    jcr_if_top_grouped.rename(columns={val: rep}, inplace=True)

In [None]:
jcr_if_top_grouped

In [None]:
# Smooth if necessary
if smooth_plot:
    jcr_if_top_grouped = jcr_if_top_grouped.rolling(3, win_type="boxcar", min_periods=1, center=True).mean()

## IF of top journals by year

In [None]:
# Plot IF by year
ax = jcr_if_top_grouped.plot()
ax.set_ylim(bottom=0)
ax.legend(frameon=False)

## Top journals by IF in last years

In [None]:
# Journals in last years
ylast = jcr_if["Leto"].max() - j_top_years
jcr_if_last_n = jcr_if[jcr_if["Leto"] > ylast]

In [None]:
# Drop unneeded columns
jcr_if_last_n_issn = jcr_if_last_n[["ISSN", "Leto", "Faktor vpliva"]]

In [None]:
# Aggregator
aggregator = {'Faktor vpliva' : 'mean',
              'Leto' : 'count'
             }
jcr_if_last_n_issn_grouped = jcr_if_last_n_issn.groupby("ISSN").agg(aggregator)

In [None]:
jcr_if_last_n_issn_grouped

In [None]:
# Only if all years have IF
jcr_top_n_y = jcr_if_last_n_issn_grouped[jcr_if_last_n_issn_grouped["Leto"] == j_top_years]["Faktor vpliva"]

In [None]:
jcr_top_n_y = jcr_top_n_y.nlargest(j_top_years_n)

In [None]:
# Select only top publications
jcr_if_top_n_y = jcr_if_issn[jcr_if_issn['ISSN'].isin(jcr_top_n_y.index)]

In [None]:
# Group by publication ISSN
jcr_if_top_n_y_grouped = jcr_if_top_n_y.groupby(["Leto", "ISSN"]).mean().unstack()
jcr_if_top_n_y_grouped.columns = jcr_if_top_n_y_grouped.columns.get_level_values(1)

In [None]:
jcr_if_top_n_y_grouped

In [None]:
# Change column names
for val in jcr_if_top_n_y_grouped.columns:
    rep = jcr_if_journals[jcr_if_journals["ISSN"] == val]["Naslov serijske publikacije"].values[0]
    print(val, rep)
    jcr_if_top_n_y_grouped.rename(columns={val: rep}, inplace=True)

In [None]:
# Smooth if necessary
if smooth_plot:
    jcr_if_top_n_y_grouped = jcr_if_top_n_y_grouped.rolling(3, win_type="boxcar", min_periods=1, center=True).mean()

In [None]:
# Plot IF by year
ax = jcr_if_top_n_y_grouped.plot()
ax.set_ylim(bottom=0)
ax.set_xlim(left=ylast)
ax.legend(frameon=False)

## Top journals last year

In [None]:
# Top journals in latest year
jcr_if_top_last_year = jcr_if_top_grouped[jcr_if_top_grouped.index == max(jcr_if_top_grouped.index)].transpose()

In [None]:
jcr_if_top_last_year.sort_values([2018],ascending=False)