In [1]:
import pandas as pd

import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import seaborn as sns

from data_utils import *

import os
import pickle

Stat metrics from [Area51](https://area51.stackexchange.com/proposals/125174). Web pages states the following healthy metrics

- **questions per day:** 10 questions per day on average is a healthy beta, 5 questions or fewer per day needs some work. A healthy site generates lots of good content to make sure users keep coming back.
- **percentage answered:** 90% answered is a healthy beta, 80% answered needs some work. In the beta it's especially important that when new visitors ask questions they usually get a good answer.
- **avid users, total users:** Every site needs a solid group of core users to assist in moderating the site. We recommend:
    - 150 users with 200+ rep (currently `reputation[0]` users with 200+ rep)
    - 10 users with 2,000+ rep (currently `reputation[1]` users with 2,000+ rep)
    - 5 users with 3,000+ rep (currently `reputation[2]` user with 3,000+ rep)
- **answer ratio:** 2.5 answers per question is good, only 1 answer per question needs some work. On a healthy site, questions receive multiple answers and the best answer is voted to the top.
- **visits/day:** 1,500 visits per day is good, 500 visits per day needs some work. A great site benefits people outside the community. Eventually, 90% of a site's traffic should come from search engines.

In [2]:
# load pickle

data_file = os.path.join("..", "data")
fname = "CSE_stats.pickle"
data_path = os.path.join(data_file, fname)

try:
    with open(data_path, 'rb') as handle:
        CSE_stats0 = pickle.load(handle)
    print(fname + " loaded succesfully!")
except:
    print("Unable to load: " + fname)
    print("creating empty dict: CSE_stats")
    CSE_stats = {}
    
CSE_stats = CSE_stats0.copy()

CSE_stats.pickle loaded succesfully!


In [3]:
strdate = "2022-02-06"

CSE_stats[strdate] = { "date_time":convert_datetime(strdate, now=True),
                       "questions_per_day" : 9.7,
                       "answered" : 79,
                       "avid_users" : 133,
                       "total_users" : 5393,
                       "reputation>200" : 133,
                       "reputation>2000" : 3,
                       "reputation>3000" : 1,
                       "answer_ratio" : 1.8,
                       "visits_day" : 995,
                       "top_beta_user01" : "https://cardano.stackexchange.com/users/28/marek-mahut-stakenuts",
                       "top_beta_user02" : "https://cardano.stackexchange.com/users/382/samuel-leathers",
                       "top_beta_user03" : "https://cardano.stackexchange.com/users/1130/mitchell-turner"
                       #"top_beta_user03" : "https://cardano.stackexchange.com/users/4302/nomad0"
                       #"top_beta_user03" : "https://cardano.stackexchange.com/users/4023/andy-jazz",
                       #"top_beta_user02" : "https://cardano.stackexchange.com/users/1142/eddex",
                       
                       }

In [4]:
CSE_stats.keys()

dict_keys(['2021-08-06', '2021-08-09', '2021-08-10', '2021-08-11', '2021-08-12', '2021-08-13', '2021-08-16', '2021-08-17', '2021-08-18', '2021-08-19', '2021-08-20', '2021-08-21', '2021-08-24', '2021-08-25', '2021-08-26', '2021-08-27', '2021-08-31', '2021-09-01', '2021-09-02', '2021-09-03', '2021-09-06', '2021-09-07', '2021-09-08', '2021-09-09', '2021-09-10', '2021-09-13', '2021-09-14', '2021-09-15', '2021-09-16', '2021-09-17', '2021-09-20', '2021-09-21', '2021-09-22', '2021-09-24', '2021-09-27', '2021-09-28', '2021-09-29', '2021-09-30', '2021-10-01', '2021-10-04', '2021-10-05', '2021-10-06', '2021-10-07', '2021-10-08', '2021-10-11', '2021-10-12', '2021-10-13', '2021-10-14', '2021-10-15', '2021-10-18', '2021-10-19', '2021-10-20', '2021-10-21', '2021-10-22', '2021-11-08', '2021-11-09', '2021-11-10', '2021-11-11', '2021-11-15', '2021-11-19', '2021-11-22', '2021-11-26', '2021-11-29', '2021-12-03', '2021-12-06', '2021-12-10', '2021-12-13', '2022-01-07', '2022-01-10', '2022-01-11', '2022-01-

In [5]:
# save as pickle
with open(data_path, 'wb') as handle:
    pickle.dump(CSE_stats, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [6]:
df = to_df(CSE_stats)
df

Unnamed: 0,date_time,questions_per_day,answered,avid_users,total_users,rep>200,rep>2000,rep>3000,answer_ratio,visits_day,top_beta_user01,top_beta_user02,top_beta_user03,reputation>200,reputation>2000,reputation>3000
0,2021-08-06 00:00:00.000000,1.8,94,78,2400,78.0,1.0,1.0,1.6,296,https://cardano.stackexchange.com/users/28/mar...,https://cardano.stackexchange.com/users/1142/e...,https://cardano.stackexchange.com/users/281/gr...,,,
1,2021-08-09 00:00:00.000000,1.4,95,80,2424,80.0,1.0,1.0,1.6,306,https://cardano.stackexchange.com/users/28/mar...,https://cardano.stackexchange.com/users/1142/e...,https://cardano.stackexchange.com/users/281/gr...,,,
2,2021-08-10 00:00:00.000000,1.5,94,80,2429,80.0,1.0,1.0,1.6,306,https://cardano.stackexchange.com/users/28/mar...,https://cardano.stackexchange.com/users/1142/e...,https://cardano.stackexchange.com/users/281/gr...,,,
3,2021-08-11 00:00:00.000000,1.4,94,80,2436,80.0,1.0,1.0,1.6,313,https://cardano.stackexchange.com/users/28/mar...,https://cardano.stackexchange.com/users/1142/e...,https://cardano.stackexchange.com/users/281/gr...,,,
4,2021-08-12 00:00:00.000000,1.5,94,80,2459,80.0,1.0,1.0,1.6,316,https://cardano.stackexchange.com/users/28/mar...,https://cardano.stackexchange.com/users/1142/e...,https://cardano.stackexchange.com/users/281/gr...,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87,2022-01-31 06:30:56.936898,12.4,79,132,5361,,,,1.8,1181,https://cardano.stackexchange.com/users/28/mar...,,https://cardano.stackexchange.com/users/4302/n...,132.0,3.0,1.0
88,2022-02-01 06:38:27.142166,10.5,79,132,5370,,,,1.8,1167,https://cardano.stackexchange.com/users/28/mar...,,https://cardano.stackexchange.com/users/4302/n...,132.0,3.0,1.0
89,2022-02-03 15:52:41.837240,9.7,79,133,5393,,,,1.8,995,https://cardano.stackexchange.com/users/28/mar...,,https://cardano.stackexchange.com/users/4302/n...,133.0,3.0,1.0
90,2022-02-04 12:26:20.992868,9.7,79,133,5393,,,,1.8,995,https://cardano.stackexchange.com/users/28/mar...,,https://cardano.stackexchange.com/users/4302/n...,133.0,3.0,1.0


# Plot

In [7]:
df["date_time"].iloc[-1]

Timestamp('2022-02-06 17:44:02.597156')

In [21]:
# grab initial and final date
t0 = df["date_time"].iloc[0]
tf = df["date_time"].iloc[-1]

fig = make_subplots(rows=3, cols=2, subplot_titles=("Questions per Day", "% of questions answered", "Visits per Day", "Answer Ratio (# of answers per question)", "Avid Users", "Total Users"))#, row_heights=[0.8, 0.2])

fig.add_trace(go.Scatter(x=df["date_time"], y=df["questions_per_day"], name="questions/day"), row=1, col=1)
fig.add_shape(type="line", x0=t0, y0=10, x1=tf, y1=10, line=dict(dash="dashdot"), row=1, col=1)

fig.add_trace(go.Scatter(x=df["date_time"], y=df["answered"], name="ans"), row=1, col=2)
fig.add_shape(type="line", x0=t0, y0=90, x1=tf, y1=90, line=dict(dash="dashdot"), row=1, col=2)

fig.add_trace(go.Scatter(x=df["date_time"], y=df["avid_users"], name="avid_users"), row=3, col=1)
fig.add_shape(type="line", x0=t0, y0=150, x1=tf, y1=150, line=dict(dash="dashdot"), row=3, col=1)

fig.add_trace(go.Scatter(x=df["date_time"], y=df["total_users"], name="total_users"), row=3, col=2)

fig.add_trace(go.Scatter(x=df["date_time"], y=df["answer_ratio"], name="answer_ratio"), row=2, col=2)
fig.add_shape(type="line", x0=t0, y0=2.5, x1=tf, y1=2.5, line=dict(dash="dashdot"), row=2, col=1)

fig.add_trace(go.Scatter(x=df["date_time"], y=df["visits_day"], name="visits/day"), row=2, col=1)
fig.add_shape(type="line", x0=t0, y0=1500, x1=tf, y1=1500, line=dict(dash="dashdot"), row=2, col=1)

fig.update_layout(showlegend=False, width=900, height=700, title="CSE metrics (YYYY-MM-DD): "+str(tf)[:10])
#fig.write_image("../images/mCSE-2022Feb06.png")

In [11]:
1/150

0.006666666666666667

In [9]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df["date_time"], y=df["avid_users"]/df["total_users"], name="avid/total users ratio"))
fig.update_layout(title_text="Avid/Total Users Ratio")
#fig.write_image("../images/avidTotalRatio-2022Jan17.png")

In [9]:
# for CSE cardano moderators
fig = make_subplots(rows=1, cols=3, subplot_titles=("Questions per Day", "% of questions answered", "Visits per Day"))#, row_heights=[0.8, 0.2])

fig.add_trace(go.Scatter(x=df["date_time"], y=df["questions_per_day"], name="questions/day"), row=1, col=1)
fig.add_shape(type="line", x0=t0, y0=10, x1=tf, y1=10, line=dict(dash="dashdot"), row=1, col=1)

fig.add_trace(go.Scatter(x=df["date_time"], y=df["answered"], name="ans"), row=1, col=2)
fig.add_shape(type="line", x0=t0, y0=90, x1=tf, y1=90, line=dict(dash="dashdot"), row=1, col=2)

fig.add_trace(go.Scatter(x=df["date_time"], y=df["visits_day"], name="visits/day"), row=1, col=3)
fig.add_shape(type="line", x0=t0, y0=1500, x1=tf, y1=1500, line=dict(dash="dashdot"), row=1, col=3)

In [1]:
281/30


9.366666666666667