# <center>VA REPORTING</center>

### Metric 1 - Registered users

In [None]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from IPython.display import display, HTML

df1 = pd.read_csv("path/to/registered/users/csv")
names = df1["country"].value_counts().index.values
values = df1["country"].value_counts().values

data = {'Country':names, 'Users':values} 
df_users = pd.DataFrame(data) 
columnsTitles = ['Country', 'Users']
df_users = df_users.reindex(columns=columnsTitles)
total = np.sum(values)
print("Total number of registered users: "+str(total))

#### Users distribution

In [None]:
labels = ["%s" % names[i] for i in range(0,6)]
labels.append("Other")

sizes = [values[0], values[1],values[2],values[3],values[4],values[5], np.sum(values[6:len(values)])]

fig1, ax1 = plt.subplots(figsize=(6,5))
fig1.subplots_adjust(0.3,0,1,1)
theme = plt.get_cmap('Spectral')

ax1.set_prop_cycle("color", [theme(1. * i / len(sizes)) for i in range(len(sizes))])

ax1.pie(sizes, shadow=True, autopct=lambda p: '{:.0f}'.format(p * total / 100), startangle=90)

ax1.axis('equal')

plt.legend(
    loc='upper left',
    labels=['%s, %1.1f%% ' % (
        l ,(float(s) / total) * 100) for l, s in zip(labels, sizes)],
    prop={'size': 11},
    bbox_to_anchor=(0.0, 1),
    bbox_transform=fig1.transFigure
)

plt.title("Total users per country")
plt.show()

### Metric 2 - Number of active users

---

Active users: <see query: Total number of active users>

---

### Metric 3 - Total number of executed tasks

---

Tasks executed: <see query: Total number of executed tasks>

---

### Metric 4 - Most used operators

The following bar chart shows the 10 most used Ophidia operators. In Appendix, the whole list is provided. 

In [None]:
df1 = pd.read_csv("path/to/csv/N most used operators/query")
names = list(df1.sort_values(by=['count'],ascending=False)['operator'][1:11])
values = list(df1.sort_values(by=['count'],ascending=False)['count'][1:11])

df_op = df1.sort_values(by=['count'],ascending=False)[1:]

x = np.arange(len(names))
width = 0.35

fig, ax =plt.subplots(figsize=(10,7))
rects1 = ax.bar(x - width/2, values, width, label = "count")

ax.set_ylabel('count')
ax.set_title('10 most used operators')
ax.set_xticks(x)
ax.set_xticklabels(names,rotation=90)
ax.legend()

def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        ax.annotate('{}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),
                    textcoords="offset points",
                    ha='center', va='bottom')
autolabel(rects1)
fig.tight_layout()
plt.show()

### Metric 5 - Total number of cores/hour used

---

Total cores/hour: <see query: Total number of cores/hour used>

---

### Metric 6 - Cumulative execution time per operator

The following bar chart shows the cumulative execution time for each Ophidia operator. Due an issue occurred during the logging process, a small percentage of entries has been discarded since the corresponding *duration* field contained a wrong value.

In [None]:
df1 = pd.read_csv("path/to/csv/Cumulative execution time per operator/query")

operators = list(df1.sort_values(by=['sum'],ascending=False)['operator'][1:15])
values = list(df1.sort_values(by=['sum'],ascending=False)['sum'][1:15])

df_cum_exec = df1.sort_values(by=['sum'],ascending=False)[1:]

fig, ax =plt.subplots(figsize=(10,7))
y = np.arange(len(operators)-1)
height = 0.5

ax.barh(y - height/2, values[1:len(values)], height, align='center')
ax.set_xscale(value="log")
ax.set_yticks(y)
ax.set_yticklabels(operators[1:len(operators)])
ax.invert_yaxis()  # labels read top-to-bottom
ax.set_xlabel('time (s)')
ax.set_title('Cumulative execution time?')

plt.show()

### Metric 7 - Total number of workflows and operators per user

In [None]:
df1 = pd.read_csv("path/to/csv/Total number of workflows, operators and cores per user/query")

names = list(df1.sort_values(by=['operators'],ascending=False)['username'][1:11])
workflows = list(df1.sort_values(by=['operators'],ascending=False)['workflows'][1:11])
operators = list(df1.sort_values(by=['operators'],ascending=False)['operators'][1:11])
df_wf_op_core = df1

n = len(names)
fig, ax = plt.subplots(figsize=(10,7))

x = np.arange(len(names)) 
width = 0.35 

p1 = plt.bar(x,workflows,width)
p2 = plt.bar(x,operators,width,bottom=workflows)

plt.ylabel('count')
plt.title('Total number of workflows and operators per user')
plt.xticks(x, names, rotation=90)

plt.legend((p1[0], p2[0]), ('Workflows', 'Operators'))
plt.show()

### Metric 7.A - Total number of operators per user

In [None]:
df1 = pd.read_csv("path/to/csv/Total number of workflows, operators and cores per user/query")

names = list(df1.sort_values(by=['operators'],ascending=False)['username'])
workflows = list(df1.sort_values(by=['operators'],ascending=False)['workflows'])
operators = list(df1.sort_values(by=['operators'],ascending=False)['operators'])

labels = ["User %s" % i for i in range (1,7)]
labels.append("Other")

sizes = [operators[0], operators[1],operators[2],operators[3],operators[4],operators[5], np.sum(operators[6:len(operators)])]
total = sum(sizes)

fig1, ax1 = plt.subplots(figsize=(6,5))
fig1.subplots_adjust(0.3,0,1,1)
theme = plt.get_cmap('Spectral')

ax1.set_prop_cycle("color", [theme(1. * i / len(sizes)) for i in range(len(sizes))])

ax1.pie(sizes, shadow=True, autopct=lambda p: '{:.0f}'.format(p * total / 100), startangle=90)
ax1.axis('equal')

plt.legend(
    loc='upper left',
    labels=['%s, %1.1f%% ' % (
        l ,(float(s) / total) * 100) for l, s in zip(labels, sizes)],
    prop={'size': 11},
    bbox_to_anchor=(0.0, 1),
    bbox_transform=fig1.transFigure
)

plt.title("Total number of executed operators per user")
plt.show()

### Metric 7.B - Total number of workflows per user

In [None]:
df1 = pd.read_csv("path/to/csv/Total number of workflows, operators and cores per user/query")

names = list(df1.sort_values(by=['workflows'],ascending=False)['username'])
workflows = list(df1.sort_values(by=['workflows'],ascending=False)['workflows'])
operators = list(df1.sort_values(by=['workflows'],ascending=False)['operators'])

labels = ["User %s" % i for i in range (1,7)]
labels.append("Other")

sizes = [workflows[0], workflows[1],workflows[2],workflows[3],workflows[4],workflows[6], np.sum(workflows[6:len(workflows)])]
total = sum(sizes)

fig1, ax1 = plt.subplots(figsize=(6,5))
fig1.subplots_adjust(0.3,0,1,1)
theme = plt.get_cmap('Spectral')

ax1.set_prop_cycle("color", [theme(1. * i / len(sizes)) for i in range(len(sizes))])

ax1.pie(sizes, autopct=lambda p: '{:.0f}'.format(p * total / 100), shadow=True, startangle=90)
ax1.axis('equal')

plt.legend(
    loc='upper left',
    labels=['%s, %1.1f%%' % (
        l, (float(s) / total) * 100) for l, s in zip(labels, sizes)],
    prop={'size': 11},
    bbox_to_anchor=(0.0, 1),
    bbox_transform=fig1.transFigure
)

plt.title("Total number of executed workflows per user")
plt.show()

### Metric 7.C - Total number of cores per user

In [None]:
df1 = pd.read_csv("path/to/csv/Total number of workflows, operators and cores per user/query")

df2 = pd.read_csv("path/to/csv/Total number of workflows, operators and cores/hour per user/query")
df_wf_op_corehour = df2

names = list(df1.sort_values(by=['tot_cores'],ascending=False)['username'])
tot_cores = list(df1.sort_values(by=['tot_cores'],ascending=False)['tot_cores'])

labels = ["User %s" % i for i in range (1,7)]
labels.append("Other")

sizes = [tot_cores[0], tot_cores[1],tot_cores[2],tot_cores[3],tot_cores[4],tot_cores[6], np.sum(tot_cores[6:len(tot_cores)])]
total = sum(sizes)

fig1, ax1 = plt.subplots(figsize=(6,5))
fig1.subplots_adjust(0.3,0,1,1)
theme = plt.get_cmap('Spectral')

ax1.set_prop_cycle("color", [theme(1. * i / len(sizes)) for i in range(len(sizes))])

ax1.pie(sizes, autopct=lambda p: '{:.0f}'.format(p * total / 100), shadow=True, startangle=90)
ax1.axis('equal')

plt.legend(
    loc='upper left',
    labels=['%s, %1.1f%%' % (
        l, (float(s) / total) * 100) for l, s in zip(labels, sizes)],
    prop={'size': 11},
    bbox_to_anchor=(0.0, 1),
    bbox_transform=fig1.transFigure
)

plt.title("Total number of used cores per user")
plt.show()

### Metric 8 - Total number of workflows, operators and cores

In [None]:
df = pd.read_csv("path/to/csv/Total number of users, workflows, operators and cores/query")
print(df.to_string(index=False))

### Metric 9 - Total number of workflows, operators and cores/hour

Starting from March 2018, the logging process has been extended to also track the number of cores used to run each task (operators).
So, the previous metric has been revised in terms of total cores/hours instead of total cores.

In [None]:
df = pd.read_csv("path/to/csv/Total number of users, workflows, operators and cores/hour/query")
print(df.to_string(index=False))

## APPENDIX

#### TOTAL USERS PER COUNTRY

In [None]:
display(HTML(df_users.to_html(index=False)))

#### Ophidia operators: total number of executions

In [None]:
display(HTML(df_op.to_html(index=False)))

#### Ophidia operators: cumulative execution time

In [None]:
display(HTML(df_cum_exec.to_html(index=False)))

#### User statistics: number of workflows and operators executed, total number of cores used

In [None]:
display(HTML(df_wf_op_core.to_html(index=False)))

### User statistics: number of workflows and operators executed, total number of cores/hour used (from March 2018)

In [None]:
display(HTML(df_wf_op_corehour.to_html(index=False)))