In [1]:
# data from stats

In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import os
import matplotlib.ticker as mtick
import matplotlib.patches as patches
from matplotlib.lines import Line2D
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy import stats

In [2]:
root = r"my root"
graph_root = r"my graph root"

In [3]:
# read colormap
def get_palette(attr="disc"):
    if attr=="disc":
        col = "discipline"
        color_df = pd.read_excel(os.path.join(root, r"palette.xlsx"))
    elif attr == "ctry":
        col = "country"
        color_df = pd.read_excel(os.path.join(root, r"palette.xlsx"), sheet_name=1)
    else:
        col = "quartile"
        color_df = pd.read_excel(os.path.join(root, r"palette.xlsx"), sheet_name=2)
        
    color_dict = color_df[[col,"code"]].set_index(col).to_dict()["code"]
    order = color_df[col]
    return color_dict, order

In [4]:
# 95% CI
def mean_confidence_interval(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), stats.sem(a)
    h = se * stats.t.ppf((1 + confidence) / 2., n-1)
    return m, h * 2

# Discipline

In [6]:
field_xticks = ['Arts',
         'Biology',
         'Biomedical', #Biomedical Research
         'Chemistry',
         'Medicine', # Clinical Medicine
         'Earth & Space',
         'Engineering',
         'Health',
         'Humanities',
         'Mathematics',
         'Physics',
         'Professional', #Professional Fields
         'Psychology',
         'Soc Sci' #Social Sciences
         ]

In [84]:
#########
#count journals
#########

df = pd.read_excel(os.path.join(root, r"table_in_doc.xlsx"), sheet_name=0)
x='field'
y='Journals'
ylim=None
title="# journals"
color_dict, order = get_palette(attr="disc")
df["color"] = df[x].map(color_dict)

# count journals
# draw bar charts
fig = px.bar(df,
             x=x,
             y=y,
             text_auto=True)

fig.update_traces(marker=dict(color=df["color"]),
                  textposition="outside",
                  cliponaxis=False)

fig.update_layout(height=300,
                  width=400,
                  template="simple_white",
                  font=dict(family="Arial"),
                  margin=dict(l=30, r=30, t=30, b=30),)

fig.update_xaxes(categoryorder="array",
                 categoryarray=order,
                 tickangle=35,
                 title=dict(text="Journal field",
                            standoff=0
                            ),
                 tickvals=order,
                 ticktext=field_xticks,
                 )

fig.update_yaxes(showgrid=True,
                 minor=dict(nticks=5,
                            showgrid=True),
                 title=dict(text="# journals",
                            standoff=0.0),
                 )
fig.show()
fig.write_image(os.path.join(graph_root, "no_journ_disc.pdf"))

In [7]:
# count total and exclusive reviewers

df = pd.read_excel(os.path.join(root, r"table_in_doc.xlsx"), sheet_name=0)
df["Perc. of group-exclusive reviewers"] = df["Perc. of group-exclusive reviewers"]/100
color_dict, order = get_palette(attr="disc")
df["color"] = df["field"].map(color_dict)

# draw bar charts
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Bar(x=df["field"],
                     y=df["Reviewers"],
                     name="Reviewer",
                     opacity=0.6,
                     marker=dict(color=df["color"])),
              )

fig.add_trace(go.Bar(x=df["field"],
                     y=df["Group-exclusive reviewers"],
                     name="Field-specific reviewer",
                     marker=dict(color=df["color"],
                                 pattern=dict(shape="/"),)),
              )

fig.add_trace(go.Scatter(x=df["field"],
                         y=df["Perc. of group-exclusive reviewers"],
                         name="%",
                         line=dict(color="black"),
                         hovertemplate="%{y:.2%}",
                         marker=dict(symbol='diamond')
                         ),
              secondary_y=True,
              )

fig.update_layout(height=300,
                  width=420,
                  template="simple_white",
                  font=dict(family="Arial"),
                  legend=dict(yanchor="top",
                              y=1.1,
                              xanchor="right",
                              x=0.95,
                              bgcolor='rgba(0,0,0,0)'),
                  barmode='overlay',
                  margin=dict(l=30, r=30, t=30, b=30),
                  )

fig.update_xaxes(categoryorder="array",
                 categoryarray=order,
                 title=dict(text="Journal field",
                            standoff=0,
                            ),
                 tickvals=order,
                 ticktext=field_xticks,
                 tickangle=35,
                 )

fig.update_yaxes(showgrid=True,
                 range=(0, 160000),
                 dtick=160000/4,
                 minor=dict(nticks=5,
                            showgrid=True),
                 title=dict(text="# reviewers",
                            standoff=0.0),
                 )

fig.update_yaxes(showgrid=False,
                 range=(0, 1),
                 dtick=1/4,
                 tickformat=".0%",
                 minor=dict(nticks=5,
                            showgrid=False),
                 title=dict(text="% field-specific reviewers",
                            standoff=0.0),
                 secondary_y=True
                 )
fig.show()
fig.write_image(os.path.join(graph_root, "no_rev_disc.pdf"))

In [11]:
# calculate median and mean of reviewerxxs

df = pd.read_csv(os.path.join(root, r"reviewer_stat/d2_reviewer.csv"))
df.groupby("d2")["reviewer_id"].agg([np.mean, np.std, np.median])

Unnamed: 0_level_0,mean,std,median
d2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Arts,42.833333,46.642886,18.0
Biology,175.765391,345.751823,34.0
Biomedical Research,190.485269,596.152211,32.0
Chemistry,124.217054,270.842538,20.0
Clinical Medicine,181.124107,443.273382,48.0
Earth and Space,107.744868,305.408746,23.0
Engineering and Technology,236.701564,1005.198473,38.0
Health,152.517928,217.633901,64.0
Humanities,51.209091,97.090339,24.0
Mathematics,138.609091,241.714787,31.0


In [5]:
# calculate median and mean

df = pd.read_csv(os.path.join(root, r"degree/disc_degree.csv"))
df.groupby("Field")["Weighted degree"].agg([np.mean, np.std, np.median])

Unnamed: 0_level_0,mean,std,median
Field,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Arts,55.666667,71.188234,22.5
Biology,822.936772,1696.352719,235.0
Biomedical Research,1013.634315,3394.253554,283.0
Chemistry,554.848837,806.819675,265.5
Clinical Medicine,670.564003,1390.928556,275.0
Earth and Space,433.909091,927.688202,173.0
Engineering and Technology,789.233454,2269.217975,242.0
Health,445.111554,642.42004,204.0
Humanities,85.163636,230.805988,24.5
Mathematics,423.636364,767.275972,127.5


In [88]:
df = pd.read_csv(os.path.join(root, r"intra-group/disc_intra_group.csv"))
x='Field'
y='% intra-group weighted degree'
color_dict, order = get_palette(attr="disc")
base = df.groupby(x)[y].agg([lambda x: mean_confidence_interval(x)[0], lambda x: mean_confidence_interval(x)[1]]).reset_index()
base.columns=["field", "mean", "ci"]

# bar chart
base["color"] = base["field"].map(color_dict)

# draw bar charts
fig = px.bar(base,
             x="field",
             y="mean",
             error_y="ci")

fig.update_traces(marker=dict(color=base["color"]),
                  textposition="outside",
                  hovertemplate="%{y:.2%}",
                  cliponaxis=False)

fig.update_layout(height=300,
                  width=400,
                  template="simple_white",
                  font=dict(family="Arial"),
                  margin=dict(l=30, r=30, t=30, b=30),
                  )

fig.update_xaxes(categoryorder="array",
                 categoryarray=order,
                 title=dict(text="Journal field",
                            standoff=0,
                            ),
                 tickvals=order,
                 ticktext=field_xticks,
                 tickangle=35,
                 )

fig.update_yaxes(showgrid=True,
                 tickformat=".0%",
                 minor=dict(nticks=5,
                            showgrid=True),
                 title=dict(text="Average % within-field degrees",
                            standoff=0.0),
                 )
fig.show()
fig.write_image(os.path.join(graph_root, "perc_within_disc.pdf"))

In [25]:
# Average % women reviewers for journals

df = pd.read_csv(os.path.join(root, r"reviewer_gender/d2_reviewer_gender.csv"))
x='d2'
y='Percentage'
color_dict, order = get_palette(attr="disc")
df[y] = df[y] / 100
base = df.groupby(x)[y].agg([lambda x: mean_confidence_interval(x)[0], lambda x: mean_confidence_interval(x)[1]]).reset_index()
base.columns=["field", "mean", "ci"]

# bar chart
base["color"] = base["field"].map(color_dict)

# draw bar charts
fig = px.bar(base,
             x="field",
             y="mean",
             error_y="ci")

fig.update_traces(marker=dict(color=base["color"]),
                  textposition="outside",
                  cliponaxis=False)

fig.update_layout(height=350,
                  width=450,
                  template="simple_white",
                  font=dict(family="Arial"))

fig.update_xaxes(categoryorder="array",
                 categoryarray=order,
                 title=dict(text="Journal field"),
                 tickvals=order,
                 ticktext=field_xticks,
                 tickangle=35,
                 )

fig.update_yaxes(showgrid=True,
                 tickformat=".0%",
                 minor=dict(nticks=5,
                            showgrid=True),
                 title=dict(text="Average % women reviewers",
                            standoff=0.0),
                 )
fig.show()
fig.write_image(os.path.join(graph_root, "perc_women_disc.pdf"))

In [89]:
# % female in all and exclusive reviewers"

color_dict, order = get_palette(attr="disc")
df = pd.read_csv(os.path.join(root, r"reviewer_gender/d2_group_gender.csv"))
df["color"] = df["d2"].map(color_dict)

fig = go.Figure()
fig.add_trace(go.Scatter(x=df["d2"],
                         y=df["reviewer"],
                         name="Reviewer",
                         mode="markers",
                         hovertemplate="%{y:.2%}",
                         marker=dict(color=df["color"],
                                     symbol="circle",
                                     size=10,
                                     opacity=0.6,
                                     line=dict(width=1,
                                               color=df["color"]),
                         )
                         ),
              )

fig.add_trace(go.Scatter(x=df["d2"],
                         y=df["exlusive_reviewer"],
                         name="Field-specific reviewer",
                         mode="markers",
                         hovertemplate="%{y:.2%}",
                         opacity=1,
                         marker=dict(color=df["color"],
                                     symbol="diamond",
                                     size=10,
                                     line=dict(width=1,
                                               color=df["color"]),
                                     )
                         ),
              )

for i, row in df.iterrows():
    fig.add_shape(type="line",
                  xref="x", yref="y",
                  x0=row["d2"], y0=row["reviewer"], x1=row["d2"], y1=row["exlusive_reviewer"],
                  line=dict(color="black",
                            width=2,
                            ),
                  )

fig.update_layout(height=300,
                  width=400,
                  template="simple_white",
                  font=dict(family="Arial"),
                  legend=dict(yanchor="top",
                              y=1,
                              xanchor="left",
                              x=0.01,
                              bgcolor='rgba(0,0,0,0)'),
                  barmode='overlay',
                  margin=dict(l=30, r=30, t=30, b=30),
                  )

fig.update_xaxes(categoryorder="array",
                 categoryarray=order,
                 title=dict(text="Journal field",
                            standoff=0,),
                 tickvals=order,
                 ticktext=field_xticks,
                 tickangle=35,
                 showgrid=True,
                 )

fig.update_yaxes(showgrid=True,
                 tickformat=".0%",
                 minor=dict(nticks=5,
                            showgrid=True),
                 title=dict(text="% women reviewers",
                            standoff=0.0),
                 )

fig.show()
fig.write_image(os.path.join(graph_root, "perc_ex_women_disc.pdf"))


In [33]:
# gini_old
df = pd.read_csv(os.path.join(root, r"reviewer_stat/d2_group_gini.csv"))
                                                                
# another df
def p2f(x):
    return float(x.strip('%'))/100

df2 = pd.read_csv(os.path.join(root, r"reviewer_stat/d2_reviewer_ctry_change.csv"),
                 converters={'perc':p2f, 'exc_perc':p2f, 'diff':p2f}
                 )
df2=df2.sort_values("perc", ascending=False).drop_duplicates("d2")

color_dict, order = get_palette(attr="disc")
df["color"] = df["d2"].map(color_dict)
df2["color"] = df2["d2"].map(color_dict)

# draw figure
fig = make_subplots(2, 1, shared_xaxes=True, row_heights=[1.5,1], vertical_spacing=0.05)

position = dict(row=1, col=1)
position2 = dict(row=2, col=1)

fig.add_trace(go.Scatter(x=df["d2"],
                         y=df["reviewer"],
                         name="Reviewer",
                         mode="markers",
                         marker=dict(color=df["color"],
                                     symbol="circle",
                                     size=10,
                                     opacity=0.6,
                                     line=dict(width=1,
                                               color=df["color"]),

                                     )
                         ),
              **position,
              )

fig.add_trace(go.Scatter(x=df["d2"],
                         y=df["exclusive reviewer"],
                         name="Field-specific reviewer",
                         mode="markers",
                         opacity=1,
                         marker=dict(color=df["color"],
                                     symbol="diamond",
                                     size=10,
                                     line=dict(width=1,
                                               color=df["color"]),
                                     )
                         ),
              **position,
              )

for i, row in df.iterrows():
    fig.add_shape(type="line",
                  xref="x", yref="y",
                  x0=row["d2"], y0=row["reviewer"], x1=row["d2"], y1=row["exclusive reviewer"],
                  line=dict(color="black",
                            width=2,
                            ),
                   **position,
                  )
    

# bar
fig.add_trace(go.Bar(x=df2["d2"],
                         y=df2["perc"],
                         name="Reviewer",
                         opacity=0.4,
                         hovertemplate="%{y:2%}",
                         marker=dict(color=df2["color"],
                                     line=dict(width=2,
                                               color=df2["color"]),
                                     )
                         ),
              **position2,
              )

fig.add_trace(go.Bar(x=df2["d2"],
                         y=df2["exc_perc"],
                         name="Field-specific reviewer",
                         marker=dict(color=df2["color"],
                                     line=dict(width=2,
                                               color=df2["color"]),
                                     pattern=dict(shape="/")
                                     )
                         ),
              **position2,
              )

for i, row in df2.iterrows():
    fig.add_annotation(x=row["d2"],
                    y=row[["perc", "exc_perc"]].max(),
                    text=row["alpha2"],
                    showarrow=False,
                    yshift=10,
                    **position2
                    )


fig.update_layout(height=550,
                  width=500,
                  template="simple_white",
                  #title=dict(text="<b>(A)<b>",),
                  font=dict(family="Arial"),
                  legend=dict(yanchor="top",
                              y=0.58,
                              xanchor="center",
                              x=0.5,
                              bgcolor='rgba(0,0,0,0)'),
                  barmode='group',
                  margin=dict(l=60, r=60))

fig.update_xaxes(categoryorder="array",
                 categoryarray=order,
                 tickvals=order,
                 ticktext=field_xticks,
                 tickangle=35,
                 showgrid=True,
                 )

fig.update_xaxes(title=dict(text="Journal field"),
                 **position2)


fig.update_yaxes(showgrid=True,
                 minor=dict(nticks=5,
                            showgrid=True),
                 title=dict(text="Gini index",
                            standoff=0.0),
                 **position,
                 )

fig.update_yaxes(showgrid=True,
                 tickformat=".0%",
                 minor=dict(nticks=5,
                            showgrid=True),
                 title=dict(text="% the top<br>reviewers' country",
                            standoff=0.0),
                 **position2,
                 )

fig.show()
fig.write_image(os.path.join(graph_root, "gini_disc.pdf"))

In [91]:
# gini single plot

df = pd.read_csv(os.path.join(root, r"reviewer_stat/d2_group_gini.csv"))
                                                                
# another df
def p2f(x):
    return float(x.strip('%'))/100

color_dict, order = get_palette(attr="disc")
df["color"] = df["d2"].map(color_dict)

# draw figure
fig = go.Figure()

fig.add_trace(go.Scatter(x=df["d2"],
                         y=df["reviewer"],
                         name="Reviewer",
                         mode="markers",
                         marker=dict(color=df["color"],
                                     symbol="circle",
                                     size=10,
                                     opacity=0.6,
                                     line=dict(width=1,
                                               color=df["color"]),

                                     )
                         ),
              )

fig.add_trace(go.Scatter(x=df["d2"],
                         y=df["exclusive reviewer"],
                         name="Field-specific reviewer",
                         mode="markers",
                         opacity=1,
                         marker=dict(color=df["color"],
                                     symbol="diamond",
                                     size=10,
                                     line=dict(width=1,
                                               color=df["color"]),
                                     )
                         ),
              )

for i, row in df.iterrows():
    fig.add_shape(type="line",
                  xref="x", yref="y",
                  x0=row["d2"], y0=row["reviewer"], x1=row["d2"], y1=row["exclusive reviewer"],
                  line=dict(color="black",
                            width=2,
                            ),
                  )
    
fig.update_layout(height=300,
                  width=400,
                  template="simple_white",
                  font=dict(family="Arial"),
                  legend=dict(yanchor="top",
                              y=1,
                              xanchor="right",
                              x=1,
                              bgcolor='rgba(0,0,0,0)'),
                  margin=dict(l=30, r=30, t=30, b=30),
                  )

fig.update_xaxes(categoryorder="array",
                 categoryarray=order,
                 tickvals=order,
                 ticktext=field_xticks,
                 tickangle=35,
                 showgrid=False,
                 title=dict(text="Journal field",
                            standoff=0,),
                 )

fig.update_yaxes(showgrid=True,
                 minor=dict(nticks=5,
                            showgrid=True),
                 title=dict(text="Gini index",
                            standoff=0.0),
                 )

fig.show()
fig.write_image(os.path.join(graph_root, "gini_disc.pdf"))

# Country

In [8]:
xticks=["US",
"UK",
"Netherlands",
"Germany",
"Switzerland",
"Australia",
"China",
"Brazil",
"Demark",
"Japan",
]

In [95]:
df = pd.read_excel(os.path.join(root, r"table_in_doc.xlsx"), sheet_name=1)
x='field'
y='Journals'
ylim=None
title="# journals"
color_dict, order = get_palette(attr="ctry")
order=order[order!="Other"]
df["color"] = df[x].map(color_dict)

# count journals
# draw bar charts
fig = px.bar(df,
             x=x,
             y=y,
             text_auto=True)

fig.update_traces(marker=dict(color=df["color"]),
                  textposition="outside",
                  cliponaxis=False)

fig.update_layout(height=300,
                  width=400,
                  template="simple_white",
                  font=dict(family="Arial"),
                  margin=dict(l=30, r=30, t=30, b=30),
                  )

fig.update_xaxes(
                 categoryorder="array",
                 categoryarray=order,
                 title=dict(text="Journal country",
                            standoff=0,),
                 tickangle=35,
                 tickvals=order,
                 ticktext=xticks,
                 )

fig.update_yaxes(showgrid=True,
                 minor=dict(nticks=5,
                            showgrid=True),
                 title=dict(text="# journals",
                            standoff=0.0),
                 )
fig.show()
fig.write_image(os.path.join(graph_root, "no_journ_ctry.pdf"))

In [9]:
# count total and exclusive reviewers

df = pd.read_excel(os.path.join(root, r"table_in_doc.xlsx"), sheet_name=1)
df["Perc. of group-exclusive reviewers"] = df["Perc. of group-exclusive reviewers"]/100
color_dict, order = get_palette(attr="ctry")
df["color"] = df["field"].map(color_dict)

# draw bar charts
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Bar(x=df["field"],
                     y=df["Reviewers"],
                     name="Reviewer",
                     opacity=0.6,
                     marker=dict(color=df["color"])),
              )

fig.add_trace(go.Bar(x=df["field"],
                     y=df["Group-exclusive reviewers"],
                     name="Country-specific reviewer",
                     marker=dict(color=df["color"],
                                 pattern=dict(shape="/"),)),
              )

fig.add_trace(go.Scatter(x=df["field"],
                         y=df["Perc. of group-exclusive reviewers"],
                         name="%",
                         line=dict(color="black"),
                         hovertemplate="%{y:.2%}",
                         marker=dict(symbol='diamond')
                         ),
              secondary_y=True,
              )

fig.update_layout(height=300,
                  width=400,
                  template="simple_white",
                  font=dict(family="Arial"),
                  legend=dict(yanchor="top",
                              y=1.1,
                              xanchor="right",
                              x=0.90,
                              bgcolor='rgba(0,0,0,0)'),
                  barmode='overlay',
                  margin=dict(l=30, r=30, t=30, b=30),
                  )

fig.update_xaxes(categoryorder="array",
                 categoryarray=order,
                 title=dict(text="Journal country",
                            standoff=0,),
                 tickangle=35,
                 tickvals=order,
                 ticktext=xticks,
                 )

fig.update_yaxes(showgrid=True,
                 range=(0, 300000),
                 dtick=300000/5,
                 minor=dict(nticks=4,
                            showgrid=True),
                 title=dict(text="# reviewers",
                            standoff=0.0),
                 )

fig.update_yaxes(showgrid=False,
                 range=(0, 1),
                 dtick=1/5,
                 tickformat=".0%",
                 minor=dict(nticks=4,
                            showgrid=False),
                 title=dict(text="% country-specific reviewers",
                            standoff=0.0),
                 secondary_y=True
                 )

fig.show()
fig.write_image(os.path.join(graph_root, "no_rev_ctry.pdf"))

In [39]:
# calculate median and mean of reviewerxxs
ctry_list = ['US',
'GB',
'NL',
'DE',
'CH',
'AU',
'CN',
'BR',
'DK',
'JP']

df = pd.read_csv(os.path.join(root, r"reviewer_stat/alpha2_journal_reviewer.csv"))
df.groupby("alpha2_journal")["reviewer_id"].agg([np.mean, np.std, np.median]).loc[ctry_list,:]

Unnamed: 0_level_0,mean,std,median
alpha2_journal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
US,181.71065,662.214729,54.0
GB,213.589857,535.260838,69.0
NL,32.947248,80.267223,13.0
DE,114.918089,326.60232,15.0
CH,109.958333,223.920253,32.0
AU,238.645455,240.80597,157.0
CN,96.635514,152.914355,25.0
BR,144.194805,180.159143,104.0
DK,391.970149,351.757189,329.0
JP,181.075758,243.76952,87.5


In [100]:
# % female in all and exclusive reviewers"

color_dict, order = get_palette(attr="ctry")
order=order[order!="Other"]
df = pd.read_csv(os.path.join(root, r"reviewer_gender/alpha2_journal_group_gender.csv"))
x='alpha2_journal'
df = df[df[x].isin(ctry_list)]
df["color"] = df[x].map(color_dict)

fig = go.Figure()
fig.add_trace(go.Scatter(x=df[x],
                         y=df["reviewer"],
                         name="Reviewer",
                         mode="markers",
                         hovertemplate="%{y:.2%}",
                         marker=dict(color=df["color"],
                                     symbol="circle",
                                     size=10,
                                     opacity=0.6,
                                     line=dict(width=1,
                                               color=df["color"]),
                         )
                         ),
              )

fig.add_trace(go.Scatter(x=df[x],
                         y=df["exlusive_reviewer"],
                         name="Country-specific reviewer",
                         mode="markers",
                         opacity=1,
                         hovertemplate="%{y:.2%}",
                         marker=dict(color=df["color"],
                                     symbol="diamond",
                                     size=10,
                                     line=dict(width=1,
                                               color=df["color"]),
                                     )
                         ),
              )

for i, row in df.iterrows():
    fig.add_shape(type="line",
                  xref="x", yref="y",
                  x0=row[x], y0=row["reviewer"], x1=row[x], y1=row["exlusive_reviewer"],
                  line=dict(color="black",
                            width=2,
                            ),
                  )

fig.update_layout(height=300,
                  width=400,
                  template="simple_white",
                  font=dict(family="Arial"),
                  legend=dict(yanchor="top",
                              y=1,
                              xanchor="left",
                              x=0.01,
                              bgcolor='rgba(0,0,0,0)'),
                  barmode='overlay',
                  margin=dict(l=30, r=30, t=30, b=30),
                  )

fig.update_xaxes(categoryorder="array",
                 categoryarray=order,
                 title=dict(text="Journal country",
                            standoff=0,
                            ),
                 tickangle=35,
                 tickvals=order,
                 ticktext=xticks,
                 showgrid=True,
                 )

fig.update_yaxes(showgrid=True,
                 tickformat=".0%",
                 minor=dict(nticks=5,
                            showgrid=True),
                 title=dict(text="% women reviewers",
                            standoff=0.0),
                 )

fig.show()
fig.write_image(os.path.join(graph_root, "perc_ex_women_ctry.pdf"))


In [101]:
# gini
color_dict, order = get_palette(attr="ctry")
order = order[order != "Other"]
x = "alpha2_journal"
df = pd.read_csv(os.path.join(
    root, r"reviewer_stat/alpha2_journal_group_gini.csv"))
df = df[df[x].isin(order)]
df["symbol"] = (df["exclusive reviewer"] - df["reviewer"]
                ).apply(lambda x: "diamond" if x > 0 else "triangle-down")

df["color"] = df[x].map(color_dict)

fig=go.Figure()
fig.add_trace(go.Scatter(x=df[x],
                         y=df["reviewer"],
                         name="Reviewer",
                         mode="markers",
                         marker=dict(color=df["color"],
                                     symbol="circle",
                                     size=10,
                                     opacity=0.6,
                                     line=dict(width=1,
                                               color=df["color"]),

                                     )
                         ),
              )

fig.add_trace(go.Scatter(x=df[x],
                         y=df["exclusive reviewer"],
                         name="Country-specific reviewer",
                         mode="markers",
                         opacity=1,
                         marker=dict(color=df["color"],
                                     symbol="diamond",
                                     size=10,
                                     line=dict(width=1,
                                               color=df["color"]),
                                     )
                         ),
              )

for i, row in df.iterrows():
    fig.add_shape(type="line",
                  xref="x", yref="y",
                  x0=row[x], y0=row["reviewer"], x1=row[x], y1=row["exclusive reviewer"],
                  line=dict(color="black",
                            width=2,
                            ),
                  )

fig.update_layout(height=300,
                  width=400,
                  template="simple_white",
                  font=dict(family="Arial"),
                  legend=dict(yanchor="top",
                              y=1,
                              xanchor="left",
                              x=0.01,
                              bgcolor='rgba(0,0,0,0)'),
                  margin=dict(l=30, r=30, t=30, b=30),)

fig.update_xaxes(categoryorder="array",
                 categoryarray=order,
                 tickvals=order,
                 ticktext=xticks,
                 tickangle=35,
                 showgrid=False,
                 title=dict(text="Journal country",
                            standoff=0,
                            ),
                 )

fig.update_yaxes(showgrid=True,
                 minor=dict(nticks=5,
                            showgrid=True),
                 title=dict(text="Gini index",
                            standoff=0.0),
                 )

fig.show()
fig.write_image(os.path.join(graph_root, "gini_ctry.pdf"))


In [102]:
# Average % women reviewers for journals

df = pd.read_csv(os.path.join(root, r"reviewer_gender/alpha2_journal_reviewer_gender.csv"))
x='alpha2_journal'
y='Percentage'
df = df[df[x].isin(ctry_list)]
df[y] /= 100
color_dict, order = get_palette(attr="ctry")
order=order[order!="Other"]
df["color"] = df[x].map(color_dict)
base = df.groupby(x)[y].agg([lambda x: mean_confidence_interval(x)[0], lambda x: mean_confidence_interval(x)[1]]).reset_index()
base.columns=["field", "mean", "ci"]

# bar chart
base["color"] = base["field"].map(color_dict)

# draw bar charts
fig = px.bar(base,
             x="field",
             y="mean",
             error_y="ci")

fig.update_traces(marker=dict(color=base["color"]),
                  textposition="outside",
                  cliponaxis=False)

fig.update_layout(height=300,
                  width=400,
                  template="simple_white",
                  font=dict(family="Arial"))

fig.update_xaxes(categoryorder="array",
                 categoryarray=order,
                 title=dict(text="Journal country",
                            standoff=0,
                            ),
                 tickangle=35,
                 tickvals=order,
                 ticktext=xticks,
                 )

fig.update_yaxes(showgrid=True,
                 tickformat=".0%",
                 minor=dict(nticks=5,
                            showgrid=True),
                 title=dict(text="Average % women reviewers",
                            standoff=0.0),
                 )
fig.show()
fig.write_image(os.path.join(graph_root, "perc_women_ctry.pdf"))

In [47]:
# calculate median and mean of weighted degrees
ctry_list = ['US',
'GB',
'NL',
'DE',
'CH',
'AU',
'CN',
'BR',
'DK',
'JP']

df = pd.read_csv(os.path.join(root, r"degree/ctry_degree.csv"))
df.groupby("Field")["Weighted degree"].agg([np.mean, np.std, np.median]).loc[ctry_list,:]

Unnamed: 0_level_0,mean,std,median
Field,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
US,657.397738,1668.943765,232.0
GB,787.667159,2249.428557,266.0
NL,250.360092,365.864225,151.0
DE,458.795222,926.529102,162.0
CH,863.869048,1266.77728,411.5
AU,792.609091,862.342341,466.5
CN,389.271028,511.455093,188.0
BR,326.25974,420.862504,237.0
DK,1512.447761,1716.457233,1096.0
JP,502.954545,589.893013,251.5


In [103]:
df = pd.read_csv(os.path.join(root, r"intra-group/ctry_intra_group.csv"))
x='Field'
y='% intra-group weighted degree'
color_dict, order = get_palette(attr="ctry")
order=order[order!="Other"]
df = df[df["Field"].isin(order)]
base = df.groupby(x)[y].agg([lambda x: mean_confidence_interval(x)[0], lambda x: mean_confidence_interval(x)[1]]).reset_index()
base.columns=["field", "mean", "ci"]

# bar chart
base["color"] = base["field"].map(color_dict)

# draw bar charts
fig = px.bar(base,
             x="field",
             y="mean",
             error_y="ci")

fig.update_traces(marker=dict(color=base["color"]),
                  textposition="outside",
                  hovertemplate="%{y:.2%}",
                  cliponaxis=False)

fig.update_layout(height=300,
                  width=400,
                  template="simple_white",
                  font=dict(family="Arial"),
                  margin=dict(l=30, r=30, t=30, b=30),
                  )

fig.update_xaxes(categoryorder="array",
                 categoryarray=order,
                 title=dict(text="Journal country",
                            standoff=0,
                            ),
                 tickangle=35,
                 tickvals=order,
                 ticktext=xticks,
                 )

fig.update_yaxes(showgrid=True,
                 tickformat=".0%",
                 minor=dict(nticks=5,
                            showgrid=True),
                 title=dict(text="Average % within-country degrees",
                            standoff=0.0),
                 )
fig.show()
fig.write_image(os.path.join(graph_root, "perc_within_ctry.pdf"))

# Quartiles

In [10]:
xticks = ["Q1","Q2","Q3","Q4"]

In [10]:
df = pd.read_excel(os.path.join(root, r"table_in_doc.xlsx"), sheet_name=2)
x = 'field'
y = 'Journals'
ylim = None
title = "# journals"
color_dict, order = get_palette(attr="new_quartile")
order = order[order != "Other"]
df["color"] = df[x].map(color_dict)

# count journals
# draw bar charts
fig = px.bar(df,
             x=x,
             y=y,
             text_auto=True)

fig.update_traces(marker=dict(color=df["color"]),
                  textposition="outside",
                  cliponaxis=False)

fig.update_layout(height=350,
                  width=270,
                  template="simple_white",
                  font=dict(family="Arial"))

fig.update_xaxes(categoryorder="array",
                 categoryarray=order,
                 title=dict(text="JIF rank",
                            standoff=0.0,
                            ),
                 tickangle=35,
                 tickvals=order,
                 ticktext=xticks,
                 )

fig.update_yaxes(showgrid=True,
                 minor=dict(nticks=5,
                            showgrid=True),
                 title=dict(text="# journals",
                            standoff=0.0),
                 )
fig.show()
fig.write_image(os.path.join(graph_root, "no_journ_q.pdf"))


In [12]:
# count total and exclusive reviewers

df = pd.read_excel(os.path.join(root, r"table_in_doc.xlsx"), sheet_name=2)
df["Perc. of group-exclusive reviewers"] = df["Perc. of group-exclusive reviewers"]/100
color_dict, order = get_palette(attr="new_quartile")
df["color"] = df["field"].map(color_dict)

# draw bar charts
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Bar(x=df["field"],
                     y=df["Reviewers"],
                     name="Reviewer",
                     opacity=0.6,
                     marker=dict(color=df["color"])),
              )

fig.add_trace(go.Bar(x=df["field"],
                     y=df["Group-exclusive reviewers"],
                     name="Rank-specific reviewer",
                     marker=dict(color=df["color"],
                                 pattern=dict(shape="/"),)),
              )

fig.add_trace(go.Scatter(x=df["field"],
                         y=df["Perc. of group-exclusive reviewers"],
                         name="%",
                         line=dict(color="black"),
                         marker=dict(symbol='diamond'),
                         ),
              secondary_y=True,
              )

fig.update_layout(height=350,
                  width=270,
                  template="simple_white",
                  font=dict(family="Arial"),
                  legend=dict(yanchor="middle",
                              y=1.1,
                              xanchor="center",
                              x=0.60,
                              bgcolor='rgba(0,0,0,0)'),
                  barmode='overlay',
                  margin=dict(t=30)
                  )

fig.update_xaxes(categoryorder="array",
                 categoryarray=order,
                 title=dict(text="JIF rank",
                            standoff=0.0,
                            ),
                 tickangle=35,
                 tickvals=order,
                 ticktext=xticks,
                 )

fig.update_yaxes(showgrid=True,
                 range=(0, 250000),
                 dtick=250000/5,
                 minor=dict(nticks=4,
                            showgrid=True),
                 title=dict(text="# reviewers",
                            standoff=0.0),
                 )

fig.update_yaxes(showgrid=False,
                 range=(0, 1),
                 dtick=1/5,
                 tickformat=".0%",
                 minor=dict(nticks=4,
                            showgrid=False),
                 title=dict(text="% rank-specific reviewers",
                            standoff=0.0),
                 secondary_y=True
                 )
fig.show()
fig.write_image(os.path.join(graph_root, "no_rev_q.pdf"))

In [5]:
df = pd.read_csv(os.path.join(root, r"reviewer_stat/new_quartile_reviewer.csv"))
df.groupby("new_quartile")["reviewer_id"].agg([np.mean, np.std, np.median])

Unnamed: 0_level_0,mean,std,median
new_quartile,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,127.063291,261.920709,26.0
1,199.451075,706.017022,39.0
2,176.085131,516.5915,42.0
3,148.36,305.520074,48.0
4,112.489647,303.935369,36.0


In [7]:
# calculate median and mean

df = pd.read_csv(os.path.join(root, r"degree/new_quartile_degree.csv"))
df.groupby("Field")["Weighted degree"].agg([np.mean, np.std, np.median])

Unnamed: 0_level_0,mean,std,median
Field,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,459.109705,1065.276699,109.0
1,840.010215,1994.312338,296.0
2,687.904956,2130.270056,234.0
3,502.397193,1032.784991,205.0
4,325.518879,659.066751,123.0


In [12]:
df = pd.read_csv(os.path.join(
    root, r"intra-group/new_quartile_intra_group.csv"))
x = 'Field'
y = '% intra-group weighted degree'
color_dict, order = get_palette(attr="new_quartile")
order = order[order != 0]
df = df[df["Field"].isin(order)]
base = df.groupby(x)[y].agg([lambda x: mean_confidence_interval(
    x)[0], lambda x: mean_confidence_interval(x)[1]]).reset_index()
base.columns = ["field", "mean", "ci"]

# bar chart
base["color"] = base["field"].map(color_dict)

# draw bar charts
fig = px.bar(base,
             x="field",
             y="mean",
             error_y="ci")

fig.update_traces(marker=dict(color=base["color"]),
                  textposition="outside",
                  hovertemplate="%{y:.2%}",
                  cliponaxis=False)

fig.update_layout(height=350,
                  width=270,
                  template="simple_white",
                  font=dict(family="Arial"))

fig.update_xaxes(categoryorder="array",
                 categoryarray=order,
                 title=dict(text="JIF rank",
                            standoff=0.0,
                            ),
                 tickangle=35,
                 tickvals=order,
                 ticktext=xticks,
                 )

fig.update_yaxes(showgrid=True,
                 tickformat=".0%",
                 minor=dict(nticks=5,
                            showgrid=True),
                 title=dict(text="Average % within-rank degrees",
                            standoff=0.0),
                 )
fig.show()
fig.write_image(os.path.join(graph_root, "perc_within_q.pdf"))


In [13]:
# Average % women reviewers for journals

df = pd.read_csv(os.path.join(root, r"reviewer_gender/new_quartile_reviewer_gender.csv"))
x='new_quartile'
y='Percentage'
df = df[df[x].isin([1,2,3,4])]
df[y] = df[y]/100
color_dict, order = get_palette(attr="quartile")
order=order[order!="Other"]
df["color"] = df[x].map(color_dict)
base = df.groupby(x)[y].agg([lambda x: mean_confidence_interval(x)[0], lambda x: mean_confidence_interval(x)[1]]).reset_index()
base.columns=["field", "mean", "ci"]

# bar chart
base["color"] = base["field"].map(color_dict)

# draw bar charts
fig = px.bar(base,
             x="field",
             y="mean",
             error_y="ci")

fig.update_traces(marker=dict(color=base["color"]),
                  textposition="outside",
                  hovertemplate="%{y:.2%}",
                  cliponaxis=False)

fig.update_layout(height=350,
                  width=270,
                  template="simple_white",
                  title=dict(text="<b>(F)",),
                  font=dict(family="Arial"))

fig.update_xaxes(categoryorder="array",
                 categoryarray=order,
                 title=dict(text="JIF rank",
                            standoff=0.0,),
                 tickangle=35,
                 tickvals=order,
                 ticktext=xticks,
                 )

fig.update_yaxes(showgrid=True,
                 tickformat=".0%",
                 minor=dict(nticks=5,
                            showgrid=True),
                 title=dict(text="Average % women reviewers",
                            standoff=0.0),
                 )
fig.show()
fig.write_image(os.path.join(graph_root, "perc_women_q.pdf"))

In [14]:
# % female in all and exclusive reviewers"

color_dict, order = get_palette(attr="new_quartile")
df = pd.read_csv(os.path.join(root, r"reviewer_gender/new_quartile_group_gender.csv"))
x='new_quartile'
df = df[df[x].isin([1,2,3,4])]
df["color"] = df[x].map(color_dict)
df["symbol"] = (df["exlusive_reviewer"] - df["reviewer"]).apply(lambda x: "diamond" if x > 0 else "triangle-down")

fig = go.Figure()
fig.add_trace(go.Scatter(x=df[x],
                         y=df["reviewer"],
                         name="Reviewer",
                         mode="markers",
                         hovertemplate="%{y:.2%}",
                         marker=dict(color=df["color"],
                                     symbol="circle",
                                     size=10,
                                     opacity=0.6,
                                     line=dict(width=1,
                                               color=df["color"]),
                         )
                         ),
              )

fig.add_trace(go.Scatter(x=df[x],
                         y=df["exlusive_reviewer"],
                         name="Rank-specific reviewer",
                         mode="markers",
                         opacity=1,
                         hovertemplate="%{y:.2%}",
                         marker=dict(color=df["color"],
                                     symbol="diamond",
                                     size=10,
                                     line=dict(width=1,
                                               color=df["color"]),
                                     )
                         ),
              )

for i, row in df.iterrows():
    fig.add_shape(type="line",
                  xref="x", yref="y",
                  x0=row[x], y0=row["reviewer"], x1=row[x], y1=row["exlusive_reviewer"],
                  line=dict(color="black",
                            width=2,
                            ),
                  )

fig.update_layout(height=350,
                  width=270,
                  template="simple_white",
                  font=dict(family="Arial"),
                  legend=dict(yanchor="top",
                              y=0.2,
                              xanchor="center",
                              x=0.6,
                              bgcolor='rgba(0,0,0,0)'),
                  barmode='overlay',
                  margin=dict(t=30)
                  )

fig.update_xaxes(categoryorder="array",
                 categoryarray=order,
                 title=dict(text="JIF rank",
                            standoff=0.0,),
                 tickangle=35,
                 tickvals=order,
                 ticktext=xticks,
                 showgrid=True,
                 )

fig.update_yaxes(showgrid=True,
                 range=(0.25, 0.35),
                 dtick=0.05,
                 tickformat=".1%",
                 minor=dict(nticks=5,
                            showgrid=True),
                 title=dict(text="% women reviewers",
                            standoff=0.0),
                 )

fig.show()
fig.write_image(os.path.join(graph_root, "perc_ex_women_q.pdf"))


In [15]:
# gini
color_dict, order = get_palette(attr="new_quartile")
order=order[order!=0]
x = "new_quartile"
df = pd.read_csv(os.path.join(root, r"reviewer_stat/new_quartile_group_gini.csv"))
df = df[df[x].isin(order)]
df["symbol"] = (df["exclusive reviewer"] - df["reviewer"]).apply(lambda x: "diamond" if x > 0 else "triangle-down")
df["color"] = df[x].map(color_dict)

# draw figure
fig=go.Figure()
fig.add_trace(go.Scatter(x=df[x],
                         y=df["reviewer"],
                         name="Reviewer",
                         mode="markers",
                         marker=dict(color=df["color"],
                                     symbol="circle",
                                     size=10,
                                     opacity=0.6,
                                     line=dict(width=1,
                                               color=df["color"]),

                                     )
                         ),
              )

fig.add_trace(go.Scatter(x=df[x],
                         y=df["exclusive reviewer"],
                         name="Rank-specific<br>reviewer",
                         mode="markers",
                         opacity=1,
                         marker=dict(color=df["color"],
                                     symbol="diamond",
                                     size=10,
                                     line=dict(width=1,
                                               color=df["color"]),
                                     )
                         ),
              )

for i, row in df.iterrows():
    fig.add_shape(type="line",
                  xref="x", yref="y",
                  x0=row[x], y0=row["reviewer"], x1=row[x], y1=row["exclusive reviewer"],
                  line=dict(color="black",
                            width=2,
                            ),
                  )

fig.update_layout(height=350,
                  width=270,
                  template="simple_white",
                  font=dict(family="Arial"),
                  legend=dict(yanchor="top",
                              y=1,
                              xanchor="left",
                              x=0.01,
                              bgcolor='rgba(0,0,0,0)'),
                  margin=dict(t=30))

fig.update_xaxes(categoryorder="array",
                 categoryarray=order,
                 tickvals=order,
                 ticktext=xticks,
                 tickangle=35,
                 showgrid=False,
                 title=dict(text="JIF rank",
                            standoff=0.0,),
                 )

fig.update_yaxes(showgrid=True,
                 minor=dict(nticks=5,
                            showgrid=True),
                 title=dict(text="Gini index",
                            standoff=0.0),
                 )

fig.show()
fig.write_image(os.path.join(graph_root, "gini_q.pdf"))

# reviewer country distribution

In [120]:
data = pd.read_csv(os.path.join(
    root, r"reviewer_stat/ctry_journal_reviewed.csv"))
data = data[data["reviewer_id"] >= 500]

# append continent
ctryDf = pd.read_excel(os.path.join(root, r"country_classification.xlsx"))
data = (pd.merge(data, ctryDf[["alpha2", "continent","Country"]],
                left_on="alpha2", right_on="alpha2")
        .drop_duplicates()
        .sort_values("continent", ascending=False))
data = data.rename(columns={"reviewer_id":"# reviewers (log)",
                            "journal_name_wos": "Average # reviewed journals"})
print(len(data))

fig = px.scatter(data,
                 x="# reviewers (log)",  # count of reviewer
                 y="Average # reviewed journals",  # avg reviewed journals
                 color="continent",
                 hover_data=['Country'],
                 log_x=True,
                 marginal_x="violin",
                 marginal_y="histogram")

fig.update_traces(opacity=1)

fig.update_layout(height=450,
                  width=550,
                  template="simple_white",
                  font=dict(family="Arial"),
                  barmode='stack',
                  legend=dict(title=None,
                              bgcolor='rgba(0,0,0,0)',
                              tracegroupgap=0.0,
                              traceorder="reversed"),
                  )

fig.update_xaxes(showgrid=True,
                 title=dict(standoff=0)
                 )

fig.update_xaxes(showline=True,
                showticklabels=True,
                visible=True,
                 minor=dict(nticks=4,
                            showgrid=True),
                 tickvals=[5,10],
                 row=1,
                 col=2
                 )

fig.update_yaxes(showgrid=True,
                 minor=dict(nticks=5,
                            showgrid=True),
                 title=dict(standoff=0),
                 row=1,
                 col=1
                 )

fig.update_yaxes(showgrid=False,
                 row=1,
                 col=2
                 )

# annotate
l1 = data.sort_values("Average # reviewed journals",ascending=False)[:10]["Country"].tolist()
l2 = data.sort_values("# reviewers (log)",ascending=False)[:10]["Country"].tolist()
to_anno = set(l1+l2)
print(to_anno)

fig.show()
fig.write_image(os.path.join(graph_root, "reviewer_dist.pdf"))


53
{'India', 'Germany', 'Singapore', 'Belgium', 'US', 'China', 'Greece', 'Austria', 'Switzerland', 'Italy', 'Japan', 'Australia', 'Spain', 'New Zealand', 'Brazil', 'UK'}


In [34]:
# test if country reviewer is correlated with journal number
data = pd.read_csv(os.path.join(
    root, r"reviewer_stat/ctry_journal_reviewed.csv"))
data = data[['alpha2', 'reviewer_id']].query("reviewer_id >= 500")

n_df = pd.read_csv(os.path.join(root, r"over10_node_list_nonull.csv"))
g_df = pd.read_csv(os.path.join(root, r"over10_edge_list_nonull.csv"))
id_list = g_df["source"].tolist() + g_df["target"].tolist()
data2 = n_df.query("Id in @id_list")[[ "Id", "alpha2_journal"]].groupby("alpha2_journal").agg(n=("Id", "nunique")).reset_index()

# test correlation
data = pd.merge(data, data2, left_on="alpha2", right_on="alpha2_journal")
data

Unnamed: 0,alpha2,reviewer_id,alpha2_journal,n
0,AT,1266,AT,15
1,AU,11316,AU,110
2,BE,1892,BE,6
3,BR,12272,BR,77
4,CA,6289,CA,48
5,CH,2000,CH,168
6,CL,881,CL,3
7,CN,26552,CN,107
8,CO,668,CO,4
9,CZ,1559,CZ,11


In [35]:
# test correlation
# pearson
import scipy.stats as stats
stats.pearsonr(data["reviewer_id"], data["n"])

PearsonRResult(statistic=0.6961869156713542, pvalue=5.5291197017235355e-08)