In [1]:
import pandas as pd
import plotly.express as px
from raceplotly.plots import barplot

In [2]:
df = pd.read_feather('sample2012.feather').explode('Tags')

df.columns

Index(['index', 'CreationDate', 'Title', 'Tags', 'ViewCount', 'AnswerCount',
       'CommentCount', 'FavoriteCount'],
      dtype='object')

In [3]:
tags = (
    df
    .groupby([df.CreationDate.dt.to_period("M"), "Tags"])
    .agg({"ViewCount": "mean", "AnswerCount": "mean", "index": "count"})
    .reset_index()
)

In [4]:
tags = tags.rename(columns={"index": "PostCount"})

# Tag explorer

In [5]:
# colors = dict(zip(tags.Tags.unique(), ["rgb(0,0,0)"] * len(tags.Tags.unique())))
# tags["color"] = tags.Tags.map(colors)


In [9]:
race = barplot(
    tags,
    item_column="Tags",
    value_column="PostCount",
    time_column="CreationDate",
    top_entries=5,
    #item_color="color",
)

bar_race = race.plot(title = 'Popular tags on Cooking StackOverflow',
                 item_label = 'Tags',
                 value_label = 'Post count (total)',
                 frame_duration = 800
                 )
bar_race.show()

In [10]:
bar_race.write_html('bar.html')

# Only baking and bread

In [32]:
mylist = ['baking','bread']

mytags = tags[tags.Tags.isin(mylist)].copy()

mytags.ViewCount = mytags.ViewCount.astype(int)

In [16]:

fig = px.scatter(mytags,
           x=mytags.CreationDate.dt.to_timestamp(),
           y="PostCount",
           color="Tags",
           title="Post count of Baking and Bread tags on StackOverflow (size = number of views)",
           trendline="ols",
           size="ViewCount",
           )
           #hover_name="Title")

fig.show()

In [17]:
fig.write_html('tags.html')

# Pandemic

In [31]:
pandemic = mytags[
    (mytags.CreationDate > "2018-01-01") & (mytags.CreationDate < "2022-07-30")
].copy()


In [24]:
covid = px.line(
    pandemic,
    x=pandemic.CreationDate.dt.to_timestamp(),
    y="PostCount",
    color="Tags",
    title="Baking and Bread",
)


covid.add_vline(
    x="2020-03-11",
    line_width=2,
    line_dash="dash",
    line_color="gray"
)

# add text annotation vertically

covid.add_annotation(
    x="2020-03-11",
    y=35,
    align="right",
    text="COVID-19",
    showarrow=False,
    font=dict(
        family="Courier New, monospace",
        size=16,
        color="black")
)


In [None]:
covid.write_html('covid.html')

# Post explorer

In [27]:
mylist = ["baking", "bread"]

mydf = df[df.Tags.isin(mylist)].copy()

mydf.loc[:, ["ViewCount", "AnswerCount", "CommentCount"]] = mydf.loc[
    :, ["ViewCount", "AnswerCount", "CommentCount"]
].astype(int)



In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`



In [28]:
# create column with year
mydf['Year'] = mydf['CreationDate'].dt.year

In [29]:
fig = px.scatter(mydf,
              x="CommentCount",
              y="ViewCount",
              color="Tags",
              size="AnswerCount",
              hover_name="Title",
              log_y=True,
              animation_frame=mydf.Year,
              #animation_group="Tags",
)

fig.show()

In [30]:
fig.write_html('scatter.html')