In [1]:
import pandas as pd


In [2]:
table_freq = pd.read_csv('trigram_frequency_table.csv', sep=',')
table_rank = pd.read_csv('trigram_rank_table.csv', sep=',')

In [5]:
import plotly.express as px

fig = px.histogram(table_freq[:15].melt(id_vars='Trigram', var_name='year', value_name='frequency'), 
                   x='Trigram', y='frequency', color='year',
                   color_discrete_sequence=px.colors.qualitative.Dark24,
                   nbins=len(table_freq['Trigram'].unique()),
                   animation_frame='year')

fig.update_layout(title='Frequency of Trigrams in arxiv.org CS articles from 2018 to 2022',
                  xaxis_title='Trigram',
                  yaxis_title='Frequency',
                  xaxis_tickangle=-45,
                  xaxis_tickformat = '%',
                  yaxis=dict(range=[0, 0.003]),
                  height=600,
                  width=1000)

#fig.update_traces(texttemplate='%{y:.2%}', textposition='outside')

fig.show()
py.plot(fig, filename = 'trigram_frequency', auto_open=True)

'https://plotly.com/~tapetrova/10/'

In [6]:
import pandas as pd
import plotly.graph_objects as go

# Extract the data for the first 10 trigrams and the years 2018-2022
trigrams = table_freq['Trigram'][:5]
years = ['2019', '2020', '2021', '2022']
data = table_freq.set_index('Trigram').loc[trigrams, years]

# Define the colors for each trigram
colors = ['#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A']

# Create a go.Figure() object
fig = go.Figure()

# Create a loop to add a trace for each trigram
for i, trigram in enumerate(trigrams):
    color = colors[i]
    fillcolor = f"rgba({str(int(color[1:3], 16))}, {str(int(color[3:5], 16))}, {str(int(color[5:7], 16))}, 0.07)"
    fig.add_trace(go.Scatterpolar(
        r=data.loc[trigram],
        theta=years,
        fill='toself',
        fillcolor=fillcolor,
        line=dict(color=color, width=2),
        name=trigram
    ))

# Set the layout of the figure
fig.update_layout(
    title='Radar Chart of Trigram Frequency in arXiv.org CS Articles from 2019-2022',
    polar=dict(
        radialaxis=dict(
            tickfont=dict(size=10)
        ),
        angularaxis=dict(
            tickfont=dict(size=10)
        )
    )
)

fig.show()
py.plot(fig, filename = 'radar_chart_frequency_2019_2022', auto_open=True)


'https://plotly.com/~tapetrova/62/'

In [7]:
data.head()

Unnamed: 0_level_0,2019,2020,2021,2022
Trigram,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
graph neural network,0.000605,0.001234,0.001583,0.001693
deep neural network,0.002236,0.00187,0.001614,0.001225
deep reinforcement learning,0.001438,0.001255,0.001128,0.00108
convolutional neural network,0.00263,0.001995,0.001676,0.001072
multiple input multiple (output),0.000901,0.000736,0.000963,0.00089


In [21]:
import plotly.graph_objects as go
import pandas as pd

# Filter the rank table to include only the top 15 trigrams of 2022
table_filtered = table_rank[table_rank['Trigram'].isin(table_rank['Trigram'][:20].tolist())]

# Sort the trigrams in descending order
table_filtered = table_filtered.sort_values(by='2022', ascending=False)

# Prepare the data for the Plotly heatmap
heatmap_data = table_filtered.set_index('Trigram').T
heatmap_data.reset_index(inplace=True)
heatmap_data.rename(columns={'index': 'Year'}, inplace=True)

# Custom color scale to emphasize the difference between lower ranks
custom_colorscale = [
    [0.0, 'rgb(30, 30, 255)'],
    [0.1, 'rgb(30, 144, 255)'],
    [0.2, 'rgb(30, 240, 255)'],
    [0.3, 'rgb(30, 255, 170)'],
    [0.4, 'rgb(30, 255, 50)'],
    [0.5, 'rgb(140, 255, 30)'],
    [0.6, 'rgb(255, 230, 30)'],
    [0.7, 'rgb(255, 160, 30)'],
    [0.8, 'rgb(255, 80, 30)'],
    [0.9, 'rgb(255, 20, 30)'],
    [1.0, 'rgb(255, 0, 0)']
]

# Create the heatmap using Plotly
fig = go.Figure(data=go.Heatmap(
    x=heatmap_data['Year'],
    y=list(heatmap_data.columns[1:]),
    z=heatmap_data.iloc[:, 1:].values.T,
    colorscale=custom_colorscale,
    colorbar=dict(title='Rank'),
    text=heatmap_data.iloc[:, 1:].values.T,  # Add rank numbers as text
    hoverinfo='skip'  # Disable hover info
))

fig.update_traces(textfont=dict(size=14, color='black'))  # Set text font size and color

fig.update_layout(
    title='Heatmap of Top 20 Trigrams of 2022 in 2018-2022',
    xaxis_title='Year',
    yaxis_title='Trigram',
    height=600,
    width=1000
)

fig.show()
py.plot(fig, filename = 'heatmap_frequency_2019_2022', auto_open=True)



'https://plotly.com/~tapetrova/38/'

In [23]:
import plotly.express as px

def add_line(fig, x0, y0, x1, y1, color, dash=None):
    fig.add_shape(
        type='line',
        x0=x0, y0=y0, x1=x1, y1=y1,
        line=dict(color=color, dash=dash)
    )

# Calculate the rank difference and filter the DataFrame
table_rank['Diff'] = table_rank['2018'] - table_rank['2022']
table_filtered = table_rank

# Create the scatterplot
fig = px.scatter(
    table_filtered,
    x='2022',
    y='2018',
    hover_name='Trigram'
)

# Add diagonal lines for reference
add_line(fig, 0, 0, 99, 99, 'gray', 'dot')
add_line(fig, 0, 0, 67, 201, 'red')
add_line(fig, 0, 0, 99, 33, 'blue')

# Color the points based on the difference between the 2022 and 2018 ranks
fig.update_traces(
    marker=dict(
        size=12,
        color=table_filtered['Diff'],
        colorscale='RdBu_r',
        colorbar=dict(title='Rank 2018 vs. 2022')
    )
)
fig.update_layout(
    title='Scatterplot of Trigrams Rank in 2022 vs. 2018',
    xaxis_title='Rank in 2022',
    yaxis_title='Rank in 2018'
)

fig.update_traces(text=table_filtered.index)
fig.show()
py.plot(fig, filename = 'scatterplot_2D', auto_open=True)

'https://plotly.com/~tapetrova/40/'

In [24]:
import plotly.express as px

# Calculate the rank difference and filter the DataFrame to include only the top 100 trigrams
table_rank['Rank 2018 vs. 2022'] = table_rank['2018'] - table_rank['2022']
table_filtered = table_rank.head(100)

# Create the 3D scatterplot
fig = px.scatter_3d(
    table_filtered,
    x='2018',
    y='2020',
    z='2022',
    color='Rank 2018 vs. 2022',
    hover_name='Trigram',
    color_continuous_scale='RdBu_r',
    width=1000,
    height=800
)

# Set the axis labels and title
fig.update_traces(
    marker=dict(
        size=12,
        color=table_filtered['Rank 2018 vs. 2022'],
        colorscale='RdBu_r',
        colorbar=dict(title='Rank Difference'),
        line=dict(width=0.5, color='Black')
    ),
    text=table_filtered['Trigram'],
    textposition='top center'
)

fig.update_layout(
    scene=dict(
        xaxis_title='Rank in 2018',
        yaxis_title='Rank in 2020',
        zaxis_title='Rank in 2022'
    ),
    title='3D Scatterplot of Top-100 Trigrams in 2022 vs. 2020 vs. 2018'
)

# Show the scatterplot
fig.show()
py.plot(fig, filename = 'scatterplot_2D', auto_open=True)

'https://plotly.com/~tapetrova/40/'