In [None]:
import requests
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd

def dblp(author):
    publications = []
    f = 0
    while True:
        # h is the number of results per page
        # f is the index of the first result for the page (pagination)

        url = f"https://dblp.org/search/publ/api?q=author%3A{author}%3A&format=json&h=30&f={f}"

        # reading json from the api endpoint
        r = requests.get(url)
        data = r.json()

        hits = data['result']['hits'].get('hit', [])

        # breaking the loop when there aren't any hits (publications)
        if not hits:
            break

        # adding to publications
        publications.extend(hits)

        # updating the f
        f += len(hits)

    if not publications:
        print('This author has no publications')
        return pd.DataFrame()

    # creating a dataframe where each row is a publication and each column is year, type, venue, access, pages for every publication.
    df = pd.DataFrame([
        {
            'Year': i['info'].get('year', pd.NA),
            'Type': i['info'].get('type', pd.NA),
            'Venue': i['info'].get('venue', pd.NA),
            'Access': i['info'].get('access', pd.NA),
            'Pages': i['info'].get('pages', pd.NA)
        }
        for i in publications
    ])

    # adding column author
    df['Author'] = author

    return df

author1 = "Panayiotis_Bozanis"
df = dblp(author1)

author2 ="Michail_Salampasis"
df2 = dblp(author2)

In [None]:
# grouping by year and sorting in descending order
pubs1 = df.groupby('Year').size().reset_index(name='Publications').sort_values(by='Publications', ascending=False)

fig = px.bar(pubs1, x='Year', y='Publications', title="Publications by Year")

fig.show()

In [None]:
# grouping by type
types1 = df.groupby('Type').size().reset_index(name='Publications')

fig = px.pie(types1, names='Type', values='Publications', title='Publication Types Distribution')
fig.update_layout(legend_title_text='Type')
fig.show()

In [None]:
# cleaning data
df['Year'] = df['Year'].astype(int)
df_access = df[df['Access'] != 'unavailable']

# grouping by year, access and sorting by year
df_count = df_access.groupby(['Year', 'Access']).size().reset_index(name='Publications').sort_values(by='Year')

fig = px.scatter(df_count, x='Year', y='Publications', color='Access', title='Open vs Closed Access Publications by Year')

layout = go.Layout(
    autosize=True,
    xaxis_title='Year',
    yaxis_title='Publications',
    xaxis = dict(
        tickmode = 'linear', # Reference: https://plotly.com/python/tick-formatting/
        tick0 = df['Year'].min(),  # starting point of the ticks
        dtick = 1  # interval between ticks, set to 1 year
    )
)

fig.update_layout(layout).show()

In [None]:
def page_count(pages):
    # none if the input is NaN or no hyphen
    if pd.isna(pages) or '-' not in pages:
        return None

    start, end = pages.split('-') # 1-8 to get 1 and 8
    start = start.split(':')[-1]  # 29:1-29:6 to get 1
    end = end.split(':')[-1]      # 29:1-29:6 to get 6

    if start.isdigit() and end.isdigit():
        return int(end) - int(start) + 1
    else:
        return None

# applying the function and cleaning
df['Page Count'] = df['Pages'].apply(page_count)
df_page = df.dropna(subset=['Page Count'])

# grouping by type and plotting average page count
avg_page_count = df_page.groupby('Type')['Page Count'].mean().reset_index()

fig = px.bar(
    avg_page_count,
    x='Type',
    y='Page Count',
    title="Average Page Count by Type"
    )

fig.show()

In [None]:
df2['Year'] = df2['Year'].astype(int)

# grouping by year and sorting by year
pubs1 = df.groupby('Year').size().reset_index(name='Publications').sort_values(by='Year')
pubs2 = df2.groupby('Year').size().reset_index(name='Publications').sort_values(by='Year')

fig = go.Figure(data=[
    go.Bar(x=pubs1['Year'], y=pubs1['Publications'], name=author1, marker_color='green'),
    go.Bar(x=pubs2['Year'], y=pubs2['Publications'], name=author2, marker_color='red')
])

layout = go.Layout(
    barmode='group',
    title="Publications by Year and Author",
    xaxis_title='Year',
    yaxis_title='Publications',
    legend_title_text='Authors',
    xaxis=dict(
        tickmode='linear',
        tick0=min(pubs1['Year'].min(), pubs2['Year'].min()),
        dtick=1
    )
)

fig.update_layout(layout).show()

In [None]:
# grouping by type
types2 = df2.groupby('Type').size().reset_index(name='Publications')

fig = go.Figure(data=[
    go.Bar(x=types1['Type'], y=types1['Publications'], name=author1, marker_color='green'),
    go.Bar(x=types2['Type'], y=types2['Publications'], name=author2, marker_color='red')
])

layout = go.Layout(
    barmode='stack',
    title="Publication Types by Author",
    xaxis_title='Type',
    yaxis_title='Publications',
    legend_title_text='Authors'
)

fig.update_layout(layout).show()

In [None]:
# cleaning
df_venue = df[df['Venue'].apply(lambda x: type(x) != list)]
df2_venue = df2[df2['Venue'].apply(lambda x: type(x) != list)]

# grouping by venue
venues1 = df_venue.groupby('Venue').size().reset_index(name='Publications')
venues2 = df2_venue.groupby('Venue').size().reset_index(name='Publications')

# Reference: https://plotly.com/python/bubble-charts/

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=venues1['Venue'],
    y=venues1['Publications'],
    mode='markers',
    marker=dict(color='blue',size=venues1['Publications'], sizemode='area', sizeref=2.*max(venues1['Publications'])/(40.**2), sizemin=4),
    name=author1
))

fig.add_trace(go.Scatter(
     x=venues2['Venue'],
     y=venues2['Publications'],
     mode='markers',
     marker=dict(color='orange',size=venues2['Publications'], sizemode='area', sizeref=2.*max(venues2['Publications'])/(40.**2), sizemin=4), # recommended scaling of the bubble size
     name=author2
 ))

layout = go.Layout(
    title_text='Publications by Venue and Author',
    xaxis_title='Venue',
    yaxis_title='Publications',
    legend_title_text = 'Authors',
    legend_itemsizing = 'constant'
)

fig.update_layout(layout).show()

In [None]:
# combining the df's
df_combined = pd.concat([df, df2])

# grouping by year, type and author
final = df_combined.groupby(['Year', 'Type', 'Author']).size().reset_index(name='Publications')

# creating a column for the combined author and type
final['Author and Type'] = final['Author'] + " - " + final['Type']

final_sorted = final.sort_values(['Type', 'Author'])

fig = px.line(
    final_sorted,
    x='Year',
    y='Publications',
    color='Author and Type',
    markers=True,
    title='Publications by Type, Year and Author'
)

layout = go.Layout(
    xaxis_title='Year',
    yaxis_title='Publications',
    legend_title_text='Author and Type',
    xaxis=dict(
        tickmode='linear',
        tick0=final_sorted['Year'].min(),
        dtick=1
    )
)

fig.update_layout(layout).show()

In [None]:
# cleaning
df_access2 = df_combined[df_combined['Access'] != 'unavailable']

# grouping by author, year, accecss
df_count = df_access2.groupby(['Author', 'Year', 'Access']).size().reset_index(name='Publications')

fig = px.line(
    df_count,
    x='Year',
    y='Publications',
    color='Author',
    line_dash='Access', # different dash patterns for the lines, Reference: https://plotly.com/python/line-charts/
    title='Open vs Closed Access Publications by Year and Author'
)

fig.show()

In [None]:
# applying the function and cleaning
df_combined['Page Count'] = df_combined['Pages'].apply(page_count)
combined_df = df_combined.dropna(subset=['Page Count'])

# grouping by type and author and plotting average page count
final_average_counts = combined_df.groupby(['Type', 'Author'])['Page Count'].mean().reset_index()

fig = px.bar(
    final_average_counts,
    x='Type',
    y='Page Count',
    color='Author',
    title='Average Page Counts by Type and Author',
    barmode='group'
)

fig.show()