In [None]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
from collections import defaultdict
DATA_DIR = '/home/yurakuratov/data/dp-agent-alexa-data'

In [None]:
data = pd.read_csv(DATA_DIR + '/ratings/ratings.csv')

In [None]:
data['Rating'] = data['Rating'].apply(lambda x: float(str(x).replace('*', '')))

In [None]:
data['Approximate Start Time'] = pd.to_datetime(data['Approximate Start Time'], utc=True)

In [None]:
from datetime import datetime, timedelta
from dateutil import tz

now = datetime.now(tz=tz.gettz('UTC'))
end = datetime(year=now.year, month=now.month, day=now.day, hour=now.hour, tzinfo=now.tzinfo)

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(rows=2, cols=1, subplot_titles=('Number of dialogs with ratings, hourly', 'Avg dialog rating, hourly'))

now = datetime.now(tz=tz.gettz('UTC'))
end = datetime(year=now.year, month=now.month, day=now.day, hour=now.hour, tzinfo=now.tzinfo)
start = end - timedelta(days=14)

x = []
counts = []
ratings = []
for dt in pd.date_range(start=start, end=end, freq='H'):
    x += [dt]
    hourly_dialogs = data[(data['Approximate Start Time'] < dt) & (data['Approximate Start Time'] > dt - dt.freq)]
    counts += [len(hourly_dialogs)]
    ratings += [0 if len(hourly_dialogs) == 0 else hourly_dialogs['Rating'].mean()]
    
fig.add_trace(go.Scatter(x=x, y=counts, fill='tozeroy', name='count', ), row=1, col=1)
fig.add_trace(go.Scatter(x=x, y=ratings, fill='tozeroy', name='rating', ), row=2, col=1)

now = datetime.now(tz=tz.gettz('UTC'))
end = datetime(year=now.year, month=now.month, day=now.day, hour=8, tzinfo=now.tzinfo)
start = end - timedelta(days=14)
x = []
counts = []
ratings = []
for dt in pd.date_range(start=start, end=end, freq='D'):
    x += [dt]
    hourly_dialogs = data[(data['Approximate Start Time'] <= dt) & (data['Approximate Start Time'] > dt - dt.freq)]
    ratings += [0 if len(hourly_dialogs) == 0 else hourly_dialogs['Rating'].mean()]
fig.add_trace(go.Scatter(x=x, y=ratings, name='rating, 24h'), row=2, col=1)

fig.update_layout(height=600, width=1200, showlegend=False)

# first plot start, end
end = datetime(year=now.year, month=now.month, day=now.day, hour=now.hour, tzinfo=now.tzinfo)
start = end - timedelta(days=14)
fig['layout']['xaxis2']['range'] = [start, end]

fig['layout']['yaxis2']['range'] = [0, 5.5]
fig.update_layout(hovermode='x')
fig.show()

Dialogs

In [None]:
from copy import deepcopy
import re
from pprint import pprint
from datetime import datetime

In [None]:
dialogs = []
dialog_end = '-----------------------'
prev_line_end = True
for dialogs_file in [DATA_DIR + '/amazon_dialogs_all.txt', DATA_DIR + '/amazon_dialogs_staging_all.txt']:
    with open(dialogs_file, 'r') as fin:
        skip_dialog = False
        dialog = {'utterances': []}
        conv_id = None
        fin_lines = fin.readlines()
        for i, line in enumerate(fin_lines):
            line = line.strip()
            if line == dialog_end:
                if conv_id is not None and not skip_dialog:                
                    dialog['rating'] = float(rating) if rating != 'no_rating' else rating
                    dialog['feedback'] = feedback
                    dialog['first_utt_time'] = datetime.strptime(first_utt_time[:19], '%Y-%m-%d %H:%M:%S')
                    dialog['last_utt_time'] = datetime.strptime(last_utt_time[:19], '%Y-%m-%d %H:%M:%S')
                    dialog['conv_id'] = conv_id
                    dialog['version'] = version.replace('v', '')
                    dialogs += [deepcopy(dialog)]
                dialog = {'utterances': []}
                skip_dialog = False
                prev_line_end = True
            else:
                if prev_line_end:
                    assert i < len(fin_lines) - 1
                    m = re.match(r'--(.*)----(.*)----(.*)', line)
                    conv_id, rating, feedback = m.group(1), m.group(2), m.group(3)
                elif 'first_utt_time--' in line:
                    m = re.match(r'--(.*)--first_utt_time--(.*)-last_utt_time-(.*)--', line)
                    version, first_utt_time, last_utt_time = m.group(1), m.group(2), m.group(3) 
                else:
                    if len(line.split(': ')) == 1:
                        dialog['utterances'][-1]['text'] += line
                    else:
                        spk, text = line.split(': ')[0], ': '.join(line.split(': ')[1:])
                        utt = dict()
                        to_add = True
                        if spk != 'Human':
                            try:
                                m = re.match(r'(.*)\((.*)\)', spk)
                                spk, active_skill = m.group(1), m.group(2)
                                utt['active_skill'] = active_skill
                            except:
                                to_add = False
                        if to_add:
                            utt['spk'] = spk
                            utt['text'] = text
                            dialog['utterances'] += [utt]
                        # filter from dialog Alexa commands from lambda: Alexa, stop and so on.
                        # and store them in alexa_command
                        if spk != 'Human' and active_skill == 'alexa_handler':
                            if '/alexa' not in dialog['utterances'][-2]['text']:
                                print(f'Warning, possible wrong order of utterances in dump: {conv_id}.')
                                skip_dialog = True
                            dialog['alexa_commands'] = dialog['utterances'][-2:]
                            dialog['utterances'] = dialog['utterances'][:-2]
                prev_line_end = False

In [None]:
dialogs = list(sorted(dialogs, key=lambda x: x['first_utt_time'], reverse=True))

In [None]:
skills_ratings = []
dialog_durations = []
for dialog in dialogs:
    r = dialog['rating']
    if r == 'no_rating':
        continue
    conv_id = dialog['conv_id']
    date = dialog['first_utt_time']
    time = (dialog['last_utt_time']-dialog['first_utt_time']).seconds
    n_utt = len(dialog['utterances'])
    dialog_durations+=[[date, time, n_utt]]
    for utt in dialog['utterances']:
        if 'active_skill' in utt:
            skills_ratings += [[date, utt['active_skill'], r, conv_id, dialog['version']]]

In [None]:
skills_ratings = pd.DataFrame(skills_ratings, columns=['date', 'active_skill', 'rating', 'conv_id', 'version'])
skills_ratings['date'] = pd.to_datetime(skills_ratings['date'], utc=True)

n_turns = skills_ratings['conv_id'].value_counts().to_dict()
skills_ratings['n_turns'] = skills_ratings['conv_id'].apply(lambda x: n_turns[x])

#skills_ratings = skills_ratings[skills_ratings['n_turns'] > 1]

dialog_durations = pd.DataFrame(dialog_durations, columns=['date','time','n_utt'])
dialog_durations['date'] = pd.to_datetime(dialog_durations['date'], utc=True)

In [None]:
def get_last_skill(dialog, exit_intent=False):
    if exit_intent and len(dialog['utterances']) >= 3:
            return dialog['utterances'][-3]['active_skill']
    return dialog['utterances'][-1]['active_skill']

In [None]:
dialog_finished_data = []
for dialog in dialogs:
    if dialog['utterances'][-1]['spk'] == 'Human':
        # just 2 dialogs in whole dump
        continue
    
    has_rating = dialog['rating'] != 'no_rating'
    if has_rating:
        rating = float(dialog['rating'])
    else:
        rating = 'no_rating'
    date = dialog['first_utt_time']
    alexa_command = 'no_alexa_command'
    bot_respond_with_goodbye = False
    n_turns = len(dialog['utterances']) // 2
    last_skill = None
    
    if 'alexa_commands' in dialog:
        alexa_command = dialog['alexa_commands'][0]['text']
        last_skill = get_last_skill(dialog)
    
    if '#+#exit' in dialog['utterances'][-1]['text']:
        bot_respond_with_goodbye = True
        last_skill = get_last_skill(dialog, exit_intent=True)
        
    if last_skill is None:
        last_skill = get_last_skill(dialog)
        
    no_command_no_goodbye = (alexa_command == 'no_alexa_command') and not bot_respond_with_goodbye
    
    conv_id = dialog['conv_id']
        
    dialog_finished_data += [[date, alexa_command, bot_respond_with_goodbye, no_command_no_goodbye, rating, has_rating, n_turns, last_skill, conv_id, dialog['version']]]
    
dialog_finished_df = pd.DataFrame(dialog_finished_data, columns=['date', 'alexa_command', 'bot_goodbye', 'no_command_no_goodbye', 'rating', 'has_rating', 'n_turns', 'last_skill','conv_id', 'version'])
dialog_finished_df['date'] = pd.to_datetime(dialog_finished_df['date'], utc=True)

In [None]:
# prepare data for weighted skills rating with EMA(alpha)

def get_skills_weights(dialog, alpha):
    skills_weights = defaultdict(int)
    for utt in dialog['utterances']:
        if utt['spk'] == 'Bot':
            active_skill = utt['active_skill']
            for sn in skills_weights:
                skills_weights[sn] *= (1-alpha)
            skills_weights[active_skill] += 1 * alpha
    return skills_weights

def get_skills_active_n(dialog):
    skills_active_n = defaultdict(int)
    for utt in dialog['utterances']:
        if utt['spk'] == 'Bot':
            active_skill = utt['active_skill']
            skills_active_n[active_skill] += 1
    return skills_active_n

get_skills_weights(dialogs[0], alpha=0.25), get_skills_active_n(dialogs[0])

# prepare dataframes with weighted rating
ema_alphas = [0.5, 0.2]
dialog_skills_weights_data = []
for dialog in dialogs:
    r = dialog['rating']
    v = dialog['version']
    if r == 'no_rating':
        continue
    conv_id = dialog['conv_id']
    date = dialog['first_utt_time']
    skills_active_n = get_skills_active_n(dialog)
    to_add = {
        'conv_id': conv_id,
        'rating': r,
        'version': v,
        'date': date,
        'n_turns': len(dialog['utterances']) // 2
    }
    for a in ema_alphas:
        skills_weights = get_skills_weights(dialog, alpha=a)
        for sn in skills_weights:
            to_add[f'{sn}_{a}_w'] = skills_weights[sn]
    for sn in skills_active_n:
        to_add[f'{sn}_n'] = skills_active_n[sn]
    dialog_skills_weights_data += [to_add]
    
            
dialog_skills_weights_data = pd.DataFrame(dialog_skills_weights_data)
dialog_skills_weights_data['date'] = pd.to_datetime(dialog_skills_weights_data['date'], utc=True)
dialog_skills_weights_data = dialog_skills_weights_data.fillna(0)

In [None]:
def find_dialog_by_id(i):
    # slow, but it is okay if you are looking for recent dialogs
    for d in dialogs:
        if d['conv_id'] == i:
            return d
    return None

In [None]:
# x = data[(data['no_command_no_goodbye'] == True)]['conv_id']
# print(len(x))
# for i in x.values:
#     d = find_dialog_by_id(i)
#     print(i, d['first_utt_time'], d['rating'])
#     for el in d['utterances']:
#         print(f"{el['spk']}: {el['text']}")
#     print('======================')

In [None]:
skill_names = skills_ratings['active_skill'].unique()
skill_names

#### releases

In [None]:
releases = pd.read_csv('releases.txt', sep=',')
releases['date'] = pd.to_datetime(releases['date'], utc=True, format='%d.%m.%Y %H:%M')
releases['release'] = releases['release'].apply(lambda x: x.replace('A/B: ', ''))
releases[:3]

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig_versions_ratings = make_subplots(rows=1, cols=1, subplot_titles=('Skills Ratings by releases',))

now = datetime.now(tz=tz.gettz('UTC'))
end = now
start = end - timedelta(days=14)

min_n_active_skill = 10

x = dict()
skill_r = dict()
skill_z = dict()
skill_names = set(skills_ratings['active_skill'].unique()) - set(['no_skill_name']) | set(['_total'])
for n in skill_names:
    skill_r[n] = []
    x[n] = []
    skill_z[n] = []

min_r, max_r = 5, 0
releases_reversed = list(reversed(releases.values))
for i, (d_start, rel) in enumerate(releases_reversed):
    if i == len(releases) - 1:
        d_end = now
    else:
        d_end = releases_reversed[i+1][0]
    versions = rel.split('/')
    release_ratings = skills_ratings[(skills_ratings['date'] < d_end) & (skills_ratings['date'] >= d_start)]
    if len(release_ratings.groupby('conv_id').first()) < 50:
        continue
#     release_ratings = release_ratings[release_ratings['version'].isin(versions)]
    for (sn, r), (_, c) in zip(release_ratings.groupby('active_skill')['rating'].mean().items(), release_ratings.groupby('active_skill')['rating'].count().items()):
        if sn in skill_names:
#             if c < min_n_active_skill:
#                 continue
            skill_r[sn] += [r]
            x[sn] += [f'{d_end.date()} {rel}']
            skill_z[sn] += [c]
    sn = '_total'
    d = release_ratings.groupby('conv_id').first()
    skill_r[sn] += [d['rating'].mean()]
    x[sn] += [f'{d_end.date()} {rel}']
    skill_z[sn] += [len(d)]
    
        
for n in sorted(list(skill_names)):
    if len(skill_r[n]) > 0:
        fig_versions_ratings.add_trace(go.Scatter(name=n, x=x[n], y=skill_r[n], customdata=skill_z[n],
                                               hovertemplate = '%{y:.2f}: count %{customdata}',
                                               line={'dash': 'dot'}, marker={'size':8}), row=1, col=1)
        min_r = min(min_r, min(skill_r[n]))
        max_r = max(max_r, max(skill_r[n]))
    
fig_versions_ratings.update_layout(height=500, width=1300, showlegend=True, )
fig_versions_ratings['layout']['yaxis1']['range'] = [min_r-0.1, max_r+0.1]
fig_versions_ratings.update_layout(hovermode='x', xaxis={'type': 'category'})
fig_versions_ratings.show()

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

ema_alpha = 0.5
fig_versions_ratings_ema = make_subplots(rows=1, cols=1, subplot_titles=(f'Skills Ratings by releases, EMA ({ema_alpha})',))

now = datetime.now(tz=tz.gettz('UTC'))
end = now
start = end - timedelta(days=14)

x = dict()
skill_r = dict()
skill_z = dict()
skill_names = set(skills_ratings['active_skill'].unique()) - set(['no_skill_name']) | set(['_total'])
for n in skill_names:
    skill_r[n] = []
    x[n] = []
    skill_z[n] = []

min_r, max_r = 5, 0
releases_reversed = list(reversed(releases.values))
for i, (d_start, rel) in enumerate(releases_reversed):
    if i == len(releases) - 1:
        d_end = now
    else:
        d_end = releases_reversed[i+1][0]
    versions = rel.split('/')
    release_ratings = dialog_skills_weights_data[(dialog_skills_weights_data['date'] < d_end) & (dialog_skills_weights_data['date'] >= d_start)]
    if len(release_ratings) < 50:
        continue
#     release_ratings = release_ratings[release_ratings['version'].isin(versions)]

    for sn in skill_names:
        if sn != '_total':
            skill_active_n = release_ratings[f'{sn}_n'].sum()
            if release_ratings[f'{sn}_{ema_alpha}_w'].sum() > 0:
                r = (release_ratings[f'{sn}_{ema_alpha}_w'] * release_ratings['rating']).sum() / release_ratings[f'{sn}_{ema_alpha}_w'].sum()
                skill_r[sn] += [r]
                x[sn] += [f'{d_end.date()} {rel}']
                skill_z[sn] += [skill_active_n]
        else:
            skill_r[sn] += [release_ratings['rating'].mean()]
            x[sn] += [f'{d_end.date()} {rel}']
            skill_z[sn] += [len(release_ratings)]
            
for n in sorted(list(skill_names)):
    if len(skill_r[n]) > 0:
        fig_versions_ratings_ema.add_trace(go.Scatter(name=n, x=x[n], y=skill_r[n], customdata=skill_z[n],
                                               hovertemplate = '%{y:.2f}: count %{customdata}',
                                               line={'dash': 'dot'}, marker={'size':8}), row=1, col=1)
        min_r = min(min_r, min(skill_r[n]))
        max_r = max(max_r, max(skill_r[n]))
    
fig_versions_ratings_ema.update_layout(height=500, width=1300, showlegend=True, )
fig_versions_ratings_ema['layout']['yaxis1']['range'] = [min_r-0.1, max_r+0.1]
fig_versions_ratings_ema.update_layout(hovermode='x', xaxis={'type': 'category'})
fig_versions_ratings_ema.show()

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

n_turns = 7
ema_alpha = 0.5
fig_versions_ratings_ema_more = make_subplots(rows=1, cols=1, subplot_titles=(f'Skills Ratings by releases, EMA ({ema_alpha}), n_turns > {n_turns}',))

now = datetime.now(tz=tz.gettz('UTC'))
end = now
start = end - timedelta(days=14)

x = dict()
skill_r = dict()
skill_z = dict()
skill_names = set(skills_ratings['active_skill'].unique()) - set(['no_skill_name']) | set(['_total'])
for n in skill_names:
    skill_r[n] = []
    x[n] = []
    skill_z[n] = []

min_r, max_r = 5, 0
releases_reversed = list(reversed(releases.values))
for i, (d_start, rel) in enumerate(releases_reversed):
    if i == len(releases) - 1:
        d_end = now
    else:
        d_end = releases_reversed[i+1][0]
    versions = rel.split('/')
    release_ratings = dialog_skills_weights_data[(dialog_skills_weights_data['date'] < d_end) & (dialog_skills_weights_data['date'] >= d_start)]
    release_ratings = release_ratings[release_ratings['n_turns'] > n_turns]
    if len(release_ratings) < 50:
        continue
#     release_ratings = release_ratings[release_ratings['version'].isin(versions)]

    for sn in skill_names:
        if sn != '_total':
            skill_active_n = release_ratings[f'{sn}_n'].sum()
            if release_ratings[f'{sn}_{ema_alpha}_w'].sum() > 0:
                r = (release_ratings[f'{sn}_{ema_alpha}_w'] * release_ratings['rating']).sum() / release_ratings[f'{sn}_{ema_alpha}_w'].sum()
                skill_r[sn] += [r]
                x[sn] += [f'{d_end.date()} {rel}']
                skill_z[sn] += [skill_active_n]
        else:
            skill_r[sn] += [release_ratings['rating'].mean()]
            x[sn] += [f'{d_end.date()} {rel}']
            skill_z[sn] += [len(release_ratings)]
            
for n in sorted(list(skill_names)):
    if len(skill_r[n]) > 0:
        fig_versions_ratings_ema_more.add_trace(go.Scatter(name=n, x=x[n], y=skill_r[n], customdata=skill_z[n],
                                               hovertemplate = '%{y:.2f}: count %{customdata}',
                                               line={'dash': 'dot'}, marker={'size':8}), row=1, col=1)
        min_r = min(min_r, min(skill_r[n]))
        max_r = max(max_r, max(skill_r[n]))
    
fig_versions_ratings_ema_more.update_layout(height=500, width=1300, showlegend=True, )
fig_versions_ratings_ema_more['layout']['yaxis1']['range'] = [min_r-0.1, max_r+0.1]
fig_versions_ratings_ema_more.update_layout(hovermode='x', xaxis={'type': 'category'})
fig_versions_ratings_ema_more.show()

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

n_turns = 7
ema_alpha = 0.5
fig_versions_ratings_ema_less = make_subplots(rows=1, cols=1, subplot_titles=(f'Skills Ratings by releases, EMA ({ema_alpha}), n_turns <= {n_turns}',))

now = datetime.now(tz=tz.gettz('UTC'))
end = now
start = end - timedelta(days=14)

x = dict()
skill_r = dict()
skill_z = dict()
skill_names = set(skills_ratings['active_skill'].unique()) - set(['no_skill_name']) | set(['_total'])
for n in skill_names:
    skill_r[n] = []
    x[n] = []
    skill_z[n] = []

min_r, max_r = 5, 0
releases_reversed = list(reversed(releases.values))
for i, (d_start, rel) in enumerate(releases_reversed):
    if i == len(releases) - 1:
        d_end = now
    else:
        d_end = releases_reversed[i+1][0]
    versions = rel.split('/')
    release_ratings = dialog_skills_weights_data[(dialog_skills_weights_data['date'] < d_end) & (dialog_skills_weights_data['date'] >= d_start)]
    release_ratings = release_ratings[release_ratings['n_turns'] <= n_turns]
    if len(release_ratings) < 50:
        continue
#     release_ratings = release_ratings[release_ratings['version'].isin(versions)]

    for sn in skill_names:
        if sn != '_total':
            skill_active_n = release_ratings[f'{sn}_n'].sum()
            if release_ratings[f'{sn}_{ema_alpha}_w'].sum() > 0:
                r = (release_ratings[f'{sn}_{ema_alpha}_w'] * release_ratings['rating']).sum() / release_ratings[f'{sn}_{ema_alpha}_w'].sum()
                skill_r[sn] += [r]
                x[sn] += [f'{d_end.date()} {rel}']
                skill_z[sn] += [skill_active_n]
        else:
            skill_r[sn] += [release_ratings['rating'].mean()]
            x[sn] += [f'{d_end.date()} {rel}']
            skill_z[sn] += [len(release_ratings)]
            
for n in sorted(list(skill_names)):
    if len(skill_r[n]) > 0:
        fig_versions_ratings_ema_less.add_trace(go.Scatter(name=n, x=x[n], y=skill_r[n], customdata=skill_z[n],
                                               hovertemplate = '%{y:.2f}: count %{customdata}',
                                               line={'dash': 'dot'}, marker={'size':8}), row=1, col=1)
        min_r = min(min_r, min(skill_r[n]))
        max_r = max(max_r, max(skill_r[n]))
    
fig_versions_ratings_ema_less.update_layout(height=500, width=1300, showlegend=True, )
fig_versions_ratings_ema_less['layout']['yaxis1']['range'] = [min_r-0.1, max_r+0.1]
fig_versions_ratings_ema_less.update_layout(hovermode='x', xaxis={'type': 'category'})
fig_versions_ratings_ema_less.show()

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig_daily_hist_ratings = make_subplots(rows=1, cols=1, subplot_titles=('Ratings, hist, Last 24h',))

now = datetime.now(tz=tz.gettz('UTC'))
end = now
start = end - timedelta(days=14)

x = dict()
skill_r = dict()
skill_c = dict()
ratings_values = list(range(6))
for n in ratings_values:
    skill_r[n] = []
    skill_c[n] = []
    x[n] = []
    
for dt in pd.date_range(start=start, end=end, freq='D'):
    daily_ratings = skills_ratings[(skills_ratings['date'] < dt) & (skills_ratings['date'] >= dt - dt.freq * 1)]
    d = daily_ratings.groupby('conv_id').first()
    d['rating_round'] = d['rating'].apply(round)
    rating_counts = d.groupby('rating_round').count()['rating']
    for r, v in rating_counts.items():
        skill_r[r] += [v / len(d)]
        skill_c[r] += [v]
        x[r] += [dt]
        
#for n in skill_names:
#    fig_daily_hist_ratings.add_trace(go.Scatter(x=x[n], y=skill_r[n], name=n, line={'dash': 'dot'}, marker={'size':8}), row=1, col=1)
for r in ratings_values:
    fig_daily_hist_ratings.add_bar(name=r,x=x[r],y=skill_r[r], customdata=skill_c[r], hovertemplate = '%{y:.2f}: count: %{customdata}')

fig_daily_hist_ratings.update_layout(height=500, width=1300, showlegend=True)
fig_daily_hist_ratings['layout']['yaxis1']['range'] = [0, 1]
fig_daily_hist_ratings.update_layout(hovermode='x', barmode='stack')
fig_daily_hist_ratings.show()

In [None]:
versions

In [None]:
def make_comparable(v):
    s = []
    for c in v.split('.'):
        try:
            s += [int(c)]
        except ValueError:
            s += [int(c.split('-')[0])]
            s += [0]
    return s
make_comparable('8.15.0-exp.2')

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig_version_ratings = make_subplots(rows=1, cols=1, subplot_titles=('Ratings by version',))

versions = set(skills_ratings['version']) - set(['no_info'])

x = dict()
version_r = dict()
version_c = dict()
ratings_values = list(range(6))
for n in ratings_values:
    version_r[n] = []
    version_c[n] = []
    x[n] = []
    
for ver in sorted(versions, key=make_comparable):
    version_ratings = skills_ratings[skills_ratings['version'] == ver]
    d = version_ratings.groupby('conv_id').first()
    d['rating_round'] = d['rating'].apply(round)
    if len(d) < 50:
        continue
    rating_counts = d.groupby('rating_round').count()['rating']
    avg_r = d['rating'].mean()
    for r, v in rating_counts.items():
        version_r[r] += [v / len(d)]
        version_c[r] += [[v, avg_r]]
        x[r] += [ver]
        
for r in ratings_values:
    fig_version_ratings.add_bar(name=r,x=x[r],y=version_r[r], customdata=version_c[r], hovertemplate = '%{y:.2f}: count: %{customdata[0]} avg_rating: %{customdata[1]:.2f}')

fig_version_ratings.update_layout(height=500, width=1300, showlegend=True)
fig_version_ratings.update_layout(hovermode='x', barmode='stack', xaxis={'type': 'category'})
fig_version_ratings.show()

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

dialog_time = make_subplots(rows=2, cols=1, subplot_titles=('Dialog time(sec), Daily', 'Avg number of utterances, Daily',))
shares_n_utt = make_subplots(rows=1, cols=1, subplot_titles=(['Number of utterances, distribution, Daily']))

now = datetime.now(tz=tz.gettz('UTC'))
end = now
start = end - timedelta(days=50)
max_lens = [2,4,8,12,16,24,32,48,64]
time_ = []
n_utt = []
x = []
utt_shares = [[] for len_ in max_lens]
for dt in pd.date_range(start=start, end=end, freq='1D'):
    daily_times = dialog_durations[(dialog_durations['date'] < dt) & (dialog_durations['date'] >= dt - dt.freq * 1)]
    if len(daily_times) > 0:
        time_.append(sum(daily_times['time'])/len(daily_times['time']))
        n_utt.append(sum(daily_times['n_utt'])/len(daily_times['n_utt']))
        for i, len_ in enumerate(max_lens):
            utt_shares[i].append((len(daily_times[daily_times['n_utt']<=len_])+0.0)/(len(daily_times)))
        x.append(dt)
dialog_time.add_trace(go.Scatter(x=x, y=time_,
                      name='Average dialog time(sec)', line={'dash': 'dot'}, marker={'size':8}), row=1, col=1)

dialog_time.add_trace(go.Scatter(x=x, y=n_utt,
                      name='Average number of utterances', line={'dash': 'dot'}, marker={'size':8}), row=2, col=1)

for d, r in releases.values:
    dialog_time.add_shape(dict(type="line", x0=d, y0=0, x1=d, y1=200, line=dict(color="RoyalBlue", width=1)), row=1, col=1)
    dialog_time.add_annotation(x=d,y=200,text=r, textangle=-90, showarrow=True, font=dict(color="black",size=10), opacity=0.7, row=1, col=1)
    dialog_time.add_shape(dict(type="line", x0=d, y0=10, x1=d, y1=35, line=dict(color="RoyalBlue", width=1)), row=2, col=1)
    dialog_time.add_annotation(x=d,y=35,text=r, textangle=-90, showarrow=True, font=dict(color="black",size=10), opacity=0.7, row=2, col=1)

dialog_time.update_layout(height=500, width=1300, showlegend=True)
dialog_time['layout']['yaxis1']['range'] = [50, 200]
dialog_time['layout']['yaxis2']['range'] = [10, 35]
dialog_time.update_layout(hovermode='x')
dialog_time.show()
for i in range(len(utt_shares)):
    shares_n_utt.add_trace(go.Scatter(x=x, y=utt_shares[i],
                      name='n_utts<='+str(max_lens[i]), line={'dash': 'dot'}, marker={'size':8}), row=1, col=1)
for d, r in releases.values:
    shares_n_utt.add_shape(dict(type="line", x0=d, y0=0, x1=d, y1=1, line=dict(color="RoyalBlue", width=1)), row=1, col=1)
    shares_n_utt.add_annotation(x=d,y=1,text=r, textangle=-90, showarrow=True, font=dict(color="black",size=10), opacity=0.7, row=1, col=1)
    
    
shares_n_utt.update_layout(height=500, width=1300, showlegend=True)
shares_n_utt['layout']['yaxis1']['range'] = [0, 1]
shares_n_utt.update_layout(hovermode='x')
shares_n_utt.show()

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig_daily_counts_relative = make_subplots(rows=1, cols=1, subplot_titles=('Skill was selected, relative, Last 24h',))

now = datetime.now(tz=tz.gettz('UTC'))
end = now
start = end - timedelta(days=14)

x = dict()
skill_c = dict()
skill_names = set(skill_names)
skill_z = dict()
for n in skill_names:
    skill_c[n] = []
    x[n] = []
    skill_z[n] = []
    
for dt in pd.date_range(start=start, end=end, freq='D'):
    daily_ratings = skills_ratings[(skills_ratings['date'] < dt) & (skills_ratings['date'] >= dt - dt.freq * 1)]
    for sn, c in daily_ratings.groupby('active_skill')['rating'].count().items():
        if sn in skill_names:
            skill_c[sn] += [c / len(daily_ratings)]
            x[sn] += [dt]
            skill_z[sn] += [c]
        
min_x, max_x = 1e10, 0        
for n in sorted(list(skill_names)):
    if len(skill_c[n]) > 0:
        fig_daily_counts_relative.add_trace(
            go.Scatter(x=x[n], y=skill_c[n], customdata=skill_z[n], name=n, line={'dash': 'dot'}, marker={'size':8},
                       hovertemplate = '%{y:.3f}: count %{customdata}'),
            row=1, col=1)
        min_x = min(min_x, min(skill_c[n]))
        max_x = max(max_x, max(skill_c[n]))
    
for d, r in releases.values:
    if d > start:
        fig_daily_counts_relative.add_shape(dict(type="line", x0=d, y0=min_x, x1=d, y1=max_x, line=dict(color="RoyalBlue", width=1)), row=1, col=1)
        fig_daily_counts_relative.add_annotation(x=d,y=max_x, text=r, textangle=-90, showarrow=True, font=dict(color="black",size=10), opacity=0.7, row=1, col=1)
    
fig_daily_counts_relative.update_layout(height=500, width=1300, showlegend=True)
fig_daily_counts_relative.update_layout(hovermode='x')
fig_daily_counts_relative.show()

In [None]:
from tqdm import tqdm as tqdm

avg_n_dialogs = 200
n_turns = 7

fig_moving_avg = make_subplots(rows=1, cols=1, subplot_titles=(f'Skills Ratings, moving average over last {avg_n_dialogs} dialogs with number of turns > {n_turns}',))

x = dict()
skill_c = dict()
skill_r = dict()
skill_names = set(skill_names)
for n in skill_names:
    skill_c[n] = []
    skill_r[n] = []
    x[n] = []
skill_c['_total'] = []
skill_r['_total'] = []
x['_total'] = []

    
now = datetime.now(tz=tz.gettz('UTC'))
end = now
start = end - timedelta(days=35)

skills_ratings_by_range = skills_ratings[(skills_ratings['date'] <= end) & (skills_ratings['date'] >= start)][::-1]
skills_ratings_by_range = skills_ratings_by_range[skills_ratings_by_range['n_turns'] >  n_turns]

min_r = 5
max_r = 0

sr_gb = skills_ratings_by_range.groupby('conv_id', sort=False)
d = sr_gb.first()
dates_by_id = d['date'].to_dict()
d['cnt'] = sr_gb['rating'].count()
d['r*cnt'] = d['cnt'] * d['rating']
s_count = d['cnt'].rolling(avg_n_dialogs).sum()
moving_avg = d['r*cnt'].rolling(avg_n_dialogs).sum() / s_count
#moving_avg = d['rating'].rolling(avg_n_dialogs).mean()

for (i, v), (_, c) in zip(moving_avg.items(), s_count.items()):
    date = dates_by_id[i]
    if not pd.isna(v):
        x['_total'] += [pd.to_datetime(date, utc=True)]
        skill_r['_total'] += [v]
        skill_c['_total'] += [c]
    
for sn in tqdm(list(skill_names)):
    sr_gb = skills_ratings_by_range[skills_ratings_by_range['active_skill']==sn].groupby('conv_id', sort=False)
    d = sr_gb.first()
    dates_by_id = d['date'].to_dict()
    d['cnt'] = sr_gb['rating'].count()
    d['r*cnt'] = d['cnt'] * d['rating']
    s_count = d['cnt'].rolling(avg_n_dialogs).sum()
    moving_avg = d['r*cnt'].rolling(avg_n_dialogs).sum() / s_count
    for (i, v), (_, c) in zip(moving_avg.items(), s_count.items()):
        date = dates_by_id[i]
        if not pd.isna(v):
            x[sn] += [pd.to_datetime(date, utc=True)]
            skill_r[sn] += [v]
            skill_c[sn] += [c]
            
            
for n in sorted(list(skill_names) + ['_total']):
    if len(skill_r[n]) == 0:
        continue
    fig_moving_avg.add_trace(go.Scatter(x=x[n], y=skill_r[n], name=n, line={'dash': 'dot'}, marker={'size':8}, 
                                        customdata=skill_c[n], hovertemplate = '%{y:.2f}: selected: %{customdata}',), row=1, col=1)
    min_r = min(min_r, min(skill_r[n]))
    max_r = max(max_r, max(skill_r[n]))
    
for d, r in releases.values:
    if d > start:
        fig_moving_avg.add_shape(dict(type="line", x0=d, y0=min_r, x1=d, y1=max_r, line=dict(color="RoyalBlue", width=1)), row=1, col=1)
        fig_moving_avg.add_annotation(x=d,y=max_r, text=r, textangle=-90, showarrow=True, font=dict(color="black",size=10), opacity=0.7, row=1, col=1)

fig_moving_avg.update_layout(height=500, width=1300, showlegend=True)
fig_moving_avg.update_layout(hovermode='x')
fig_moving_avg['layout']['yaxis1']['range'] = [min_r, max_r]
fig_moving_avg.show()

In [None]:
# from tqdm import tqdm as tqdm

# avg_n_dialogs = 200
# n_turns = 7

# fig_moving_avg_less = make_subplots(rows=1, cols=1, subplot_titles=(f'Skills Ratings, moving average over last {avg_n_dialogs} dialogs with number of turns <= {n_turns}',))

# x = dict()
# skill_c = dict()
# skill_r = dict()
# skill_names = set(skill_names)
# for n in skill_names:
#     skill_c[n] = []
#     skill_r[n] = []
#     x[n] = []
# skill_c['_total'] = []
# skill_r['_total'] = []
# x['_total'] = []

    
# now = datetime.now(tz=tz.gettz('UTC'))
# end = now
# start = end - timedelta(days=35)

# skills_ratings_by_range = skills_ratings[(skills_ratings['date'] <= end) & (skills_ratings['date'] >= start)][::-1]
# skills_ratings_by_range = skills_ratings_by_range[(skills_ratings_by_range['n_turns'] <= n_turns) & (skills_ratings_by_range['n_turns'] >1)]

# min_r, max_r = 5, 0

# sr_gb = skills_ratings_by_range.groupby('conv_id', sort=False)
# d = sr_gb.first()
# dates_by_id = d['date'].to_dict()
# d['cnt'] = sr_gb['rating'].count()
# d['r*cnt'] = d['cnt'] * d['rating']
# s_count = d['cnt'].rolling(avg_n_dialogs).sum()
# moving_avg = d['r*cnt'].rolling(avg_n_dialogs).sum() / s_count
# for (i, v), (_, c) in zip(moving_avg.items(), s_count.items()):
#     date = dates_by_id[i]
#     if not pd.isna(v):
#         x['_total'] += [pd.to_datetime(date, utc=True)]
#         skill_r['_total'] += [v]
#         skill_c['_total'] += [c]
    
# for sn in tqdm(list(skill_names)):
#     sr_gb = skills_ratings_by_range[skills_ratings_by_range['active_skill']==sn].groupby('conv_id', sort=False)
#     d = sr_gb.first()
#     dates_by_id = d['date'].to_dict()
#     d['cnt'] = sr_gb['rating'].count()
#     d['r*cnt'] = d['cnt'] * d['rating']
#     s_count = d['cnt'].rolling(avg_n_dialogs).sum()
#     moving_avg = d['r*cnt'].rolling(avg_n_dialogs).sum() / s_count
#     for (i, v), (_, c) in zip(moving_avg.items(), s_count.items()):
#         date = dates_by_id[i]
#         if not pd.isna(v):
#             x[sn] += [pd.to_datetime(date, utc=True)]
#             skill_r[sn] += [v]
#             skill_c[sn] += [c]
            
            
# for n in sorted(list(skill_names) + ['_total']):
#     if len(skill_r[n]) == 0:
#         continue
#     fig_moving_avg_less.add_trace(go.Scatter(x=x[n], y=skill_r[n], name=n, line={'dash': 'dot'}, marker={'size':8}, 
#                                         customdata=skill_c[n], hovertemplate = '%{y:.2f}: selected: %{customdata}',), row=1, col=1)
#     min_r = min(min_r, min(skill_r[n]))
#     max_r = max(max_r, max(skill_r[n]))
    
# for d, r in releases.values:
#     if d > start:
#         fig_moving_avg_less.add_shape(dict(type="line", x0=d, y0=min_r, x1=d, y1=max_r, line=dict(color="RoyalBlue", width=1)), row=1, col=1)
#         fig_moving_avg_less.add_annotation(x=d,y=max_r, text=r, textangle=-90, showarrow=True, font=dict(color="black",size=10), opacity=0.7, row=1, col=1)

# fig_moving_avg_less.update_layout(height=500, width=1300, showlegend=True)
# fig_moving_avg_less.update_layout(hovermode='x')
# fig_moving_avg_less['layout']['yaxis1']['range'] = [min_r, max_r]
# fig_moving_avg_less.show()

In [None]:
from tqdm import tqdm as tqdm

avg_n_dialogs = 200
n_turns = 7

fig_moving_avg_d_total = make_subplots(rows=1, cols=1, subplot_titles=(f'Skills Ratings, -_total, moving average over last {avg_n_dialogs} dialogs with number of turns > {n_turns}',))

x = dict()
skill_c = dict()
skill_r = dict()
skill_names = set(skills_ratings['active_skill'].unique()) - set(['no_skill_name'])
for n in skill_names:
    skill_c[n] = []
    skill_r[n] = []
    x[n] = []
skill_c['_total'] = []
skill_r['_total'] = []
x['_total'] = []

    
now = datetime.now(tz=tz.gettz('UTC'))
end = now
start = end - timedelta(days=35)

skills_ratings_by_range = skills_ratings[(skills_ratings['date'] <= end) & (skills_ratings['date'] >= start)][::-1]
skills_ratings_by_range = skills_ratings_by_range[skills_ratings_by_range['n_turns'] >  n_turns]

min_r = 5
max_r = 0

sr_gb = skills_ratings_by_range.groupby('conv_id', sort=False)
d = sr_gb.first()
dates_by_id = d['date'].to_dict()
d['cnt'] = sr_gb['rating'].count()
d['r*cnt'] = d['cnt'] * d['rating']
s_count = d['cnt'].rolling(avg_n_dialogs).sum()
moving_avg = d['r*cnt'].rolling(avg_n_dialogs).sum() / s_count
total = dict()

for (i, v), (_, c) in zip(moving_avg.items(), s_count.items()):
    date = dates_by_id[i]
    if not pd.isna(v):
        x['_total'] += [pd.to_datetime(date, utc=True)]
        skill_r['_total'] += [v]
        skill_c['_total'] += [c]
        total[pd.to_datetime(date, utc=True)] = v
    
for sn in tqdm(list(skill_names)):
    sr_gb = skills_ratings_by_range[skills_ratings_by_range['active_skill']==sn].groupby('conv_id', sort=False)
    d = sr_gb.first()
    dates_by_id = d['date'].to_dict()
    d['cnt'] = sr_gb['rating'].count()
    d['r*cnt'] = d['cnt'] * d['rating']
    s_count = d['cnt'].rolling(avg_n_dialogs).sum()
    moving_avg = d['r*cnt'].rolling(avg_n_dialogs).sum() / s_count
    for (i, v), (_, c) in zip(moving_avg.items(), s_count.items()):
        date = dates_by_id[i]
        if not pd.isna(v):
            x[sn] += [pd.to_datetime(date, utc=True)]
            skill_r[sn] += [v - total[pd.to_datetime(date, utc=True)]]
            skill_c[sn] += [c]
            
            
for n in sorted(list(skill_names)):
    if len(skill_r[n]) == 0:
        continue
    fig_moving_avg_d_total.add_trace(go.Scatter(x=x[n], y=skill_r[n], name=n, line={'dash': 'dot'}, marker={'size':8}, 
                                        customdata=skill_c[n], hovertemplate = '%{y:.2f}: selected: %{customdata}',), row=1, col=1)
    min_r = min(min_r, min(skill_r[n]))
    max_r = max(max_r, max(skill_r[n]))
    
for d, r in releases.values:
    if d > start:
        fig_moving_avg_d_total.add_shape(dict(type="line", x0=d, y0=min_r, x1=d, y1=max_r, line=dict(color="RoyalBlue", width=1)), row=1, col=1)
        fig_moving_avg_d_total.add_annotation(x=d,y=max_r, text=r, textangle=-90, showarrow=True, font=dict(color="black",size=10), opacity=0.7, row=1, col=1)
    
fig_moving_avg_d_total.update_layout(height=500, width=1300, showlegend=True)
fig_moving_avg_d_total.update_layout(hovermode='x')
fig_moving_avg_d_total['layout']['yaxis1']['range'] = [min_r, max_r]
fig_moving_avg_d_total.show()

In [None]:
from tqdm import tqdm as tqdm

avg_n_dialogs = 200
n_turns = 7

fig_moving_avg_d_total_less = make_subplots(rows=1, cols=1, subplot_titles=(f'Skills Ratings, -_total, moving average over last {avg_n_dialogs} dialogs with number of turns <= {n_turns}',))

x = dict()
skill_c = dict()
skill_r = dict()
skill_names = set(skills_ratings['active_skill'].unique()) - set(['no_skill_name'])
for n in skill_names:
    skill_c[n] = []
    skill_r[n] = []
    x[n] = []
skill_c['_total'] = []
skill_r['_total'] = []
x['_total'] = []

    
now = datetime.now(tz=tz.gettz('UTC'))
end = now
start = end - timedelta(days=35)

skills_ratings_by_range = skills_ratings[(skills_ratings['date'] <= end) & (skills_ratings['date'] >= start)][::-1]
skills_ratings_by_range = skills_ratings_by_range[(skills_ratings_by_range['n_turns'] <= n_turns) & (skills_ratings_by_range['n_turns'] >1)]

min_r, max_r = 5, 0

sr_gb = skills_ratings_by_range.groupby('conv_id', sort=False)
d = sr_gb.first()
dates_by_id = d['date'].to_dict()
d['cnt'] = sr_gb['rating'].count()
d['r*cnt'] = d['cnt'] * d['rating']
s_count = d['cnt'].rolling(avg_n_dialogs).sum()
moving_avg = d['r*cnt'].rolling(avg_n_dialogs).sum() / s_count
total = dict()
for (i, v), (_, c) in zip(moving_avg.items(), s_count.items()):
    date = dates_by_id[i]
    if not pd.isna(v):
        x['_total'] += [pd.to_datetime(date, utc=True)]
        skill_r['_total'] += [v]
        skill_c['_total'] += [c]
        total[pd.to_datetime(date, utc=True)] = v
    
for sn in tqdm(list(skill_names)):
    sr_gb = skills_ratings_by_range[skills_ratings_by_range['active_skill']==sn].groupby('conv_id', sort=False)
    d = sr_gb.first()
    dates_by_id = d['date'].to_dict()
    d['cnt'] = sr_gb['rating'].count()
    d['r*cnt'] = d['cnt'] * d['rating']
    s_count = d['cnt'].rolling(avg_n_dialogs).sum()
    moving_avg = d['r*cnt'].rolling(avg_n_dialogs).sum() / s_count
    for (i, v), (_, c) in zip(moving_avg.items(), s_count.items()):
        date = dates_by_id[i]
        if not pd.isna(v):
            x[sn] += [pd.to_datetime(date, utc=True)]
            skill_r[sn] += [v - total[pd.to_datetime(date, utc=True)]]
            skill_c[sn] += [c]
            
            
for n in sorted(list(skill_names)):
    if len(skill_r[n]) == 0:
        continue
    fig_moving_avg_d_total_less.add_trace(go.Scatter(x=x[n], y=skill_r[n], name=n, line={'dash': 'dot'}, marker={'size':8}, 
                                        customdata=skill_c[n], hovertemplate = '%{y:.2f}: selected: %{customdata}',), row=1, col=1)
    min_r = min(min_r, min(skill_r[n]))
    max_r = max(max_r, max(skill_r[n]))
    
for d, r in releases.values:
    if d > start:
        fig_moving_avg_d_total_less.add_shape(dict(type="line", x0=d, y0=min_r, x1=d, y1=max_r, line=dict(color="RoyalBlue", width=1)), row=1, col=1)
        fig_moving_avg_d_total_less.add_annotation(x=d,y=max_r, text=r, textangle=-90, showarrow=True, font=dict(color="black",size=10), opacity=0.7, row=1, col=1)
    
fig_moving_avg_d_total_less.update_layout(height=500, width=1300, showlegend=True)
fig_moving_avg_d_total_less.update_layout(hovermode='x')
fig_moving_avg_d_total_less['layout']['yaxis1']['range'] = [min_r, max_r]
fig_moving_avg_d_total_less.show()

In [None]:
from collections import Counter
alexa_commands = Counter(dialog.get('alexa_commands', [{}])[0].get('text', '_empty') for dialog in dialogs)
alexa_commands

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig_dialog_finished_day = make_subplots(rows=1, cols=1, subplot_titles=('Dialog finished reason, with rating, Last 24h',))

now = datetime.now(tz=tz.gettz('UTC'))
end = now
start = end - timedelta(days=14)

x = dict()
value_v = dict()
value_c = dict()
finished_values = set(dialog_finished_df['alexa_command'].unique()) - {'no_alexa_command'}
finished_values |= {'no_command_no_goodbye', 'bot_goodbye'}
# because of some bugged dialog
finished_values -= {'alexa handler: command logged'}
for n in finished_values:
    value_v[n] = []
    value_c[n] = []
    x[n] = []
    
for dt in pd.date_range(start=start, end=end, freq='D'):
    daily_data = dialog_finished_df[(dialog_finished_df['date'] < dt) & (dialog_finished_df['date'] >= dt - dt.freq * 1)]
    daily_data = daily_data[daily_data['has_rating']==True]
    for v in finished_values:
        if v.startswith('/'):
            v_count = (daily_data['alexa_command'] == v).sum()
            avg_rating = daily_data[daily_data['alexa_command'] == v]['rating'].mean()
            avg_n_turns = daily_data[daily_data['alexa_command'] == v]['n_turns'].mean()
        else:
            v_count = daily_data[v].sum()
            avg_rating = daily_data[daily_data[v]]['rating'].mean()
            avg_n_turns = daily_data[daily_data[v]]['n_turns'].mean()
        if v_count > 0:
            value_v[v] += [v_count / len(daily_data)]
            value_c[v] += [[v_count, avg_rating, avg_n_turns]]
            x[v] += [dt]
    
for r in sorted(list(finished_values), reverse=True):
    fig_dialog_finished_day.add_bar(name=r,x=x[r],y=value_v[r], customdata=value_c[r], hovertemplate = '%{y:.2f}: count: %{customdata[0]} rating: %{customdata[1]:.2f} n_turns: %{customdata[2]:.2f}', row=1, col=1)

fig_dialog_finished_day.update_layout(height=500, width=1300, showlegend=True)
fig_dialog_finished_day['layout']['yaxis1']['range'] = [0, 1]
fig_dialog_finished_day.update_layout(hovermode='x', barmode='stack')
fig_dialog_finished_day.show()


In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig_dialog_finished_skill_day = make_subplots(rows=1, cols=1, subplot_titles=('Last skill in dialog, with rating, Last 24h',))

now = datetime.now(tz=tz.gettz('UTC'))
end = now
start = end - timedelta(days=14)

x = dict()
value_v = dict()
value_c = dict()
skill_names = set(skill_names)
for n in skill_names:
    value_c[n] = []
    value_v[n] = []
    x[n] = []
    
for dt in pd.date_range(start=start, end=end, freq='1D'):
    daily_data = dialog_finished_df[(dialog_finished_df['date'] < dt) & (dialog_finished_df['date'] >= dt - dt.freq)]
    daily_data = daily_data[(daily_data['has_rating'])]

    for sn in skill_names:
        d = daily_data[daily_data['last_skill'] == sn]
        if len(d) > 2:
            value_v[sn] += [len(d) / len(daily_data)]
            value_c[sn] += [[len(d), d['rating'].mean(), d['n_turns'].mean()]]
            x[sn] += [dt]
    
min_v, max_v = 10*10, - 10**10
for sn in sorted(list(skill_names)):
    if len(value_v[sn]) > 0:
        fig_dialog_finished_skill_day.add_scatter(name=sn,x=x[sn],y=value_v[sn], customdata=value_c[sn], line={'dash': 'dot'}, hovertemplate = '%{y:.2f}: count: %{customdata[0]} rating: %{customdata[1]:.2f} n_turns: %{customdata[2]:.2f}', row=1, col=1)
        min_v = min(min_v, min(value_v[sn]))
        max_v = max(max_v, max(value_v[sn]))
    
for d, r in releases.values:
    if d > start:
        fig_dialog_finished_skill_day.add_shape(dict(type="line", x0=d, y0=min_v, x1=d, y1=max_v, line=dict(color="RoyalBlue", width=1)), row=1, col=1)
        fig_dialog_finished_skill_day.add_annotation(x=d,y=max_v, text=r, textangle=-90, showarrow=True, font=dict(color="black",size=10), opacity=0.7, row=1, col=1)

fig_dialog_finished_skill_day.update_layout(height=500, width=1300, showlegend=True)
#fig_dialog_finished_skill_day['layout']['yaxis1']['range'] = [0, 0.5]
fig_dialog_finished_skill_day.update_layout(hovermode='x')
fig_dialog_finished_skill_day.show()

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig_dialog_finished_stop_skill_day = make_subplots(rows=1, cols=1, subplot_titles=('Last skill in dialog, "Alexa, stop", "Alexa, exit", "Alexa, quit", with rating, Last 24h',))

now = datetime.now(tz=tz.gettz('UTC'))
end = now
start = end - timedelta(days=14)

x = dict()
value_v = dict()
value_c = dict()
skill_names = set(skill_names)
for n in skill_names:
    value_c[n] = []
    value_v[n] = []
    x[n] = []
    
for dt in pd.date_range(start=start, end=end, freq='12H'):
    daily_data = dialog_finished_df[(dialog_finished_df['date'] < dt) & (dialog_finished_df['date'] >= dt - dt.freq * 2)]
    daily_data = daily_data[(daily_data['has_rating']==True) & ((daily_data['alexa_command'] == '/alexa_stop_handler') | (daily_data['alexa_command'] =='/alexa_USER_INITIATED'))]

    for sn in skill_names:
        d = daily_data[daily_data['last_skill'] == sn]
        if len(d) > 2:
            value_v[sn] += [len(d) / len(daily_data)]
            value_c[sn] += [[len(d), d['rating'].mean(), d['n_turns'].mean()]]
            x[sn] += [dt]
    
min_v, max_v = 10**10, - 10**10
for sn in sorted(list(skill_names)):
    if len(value_v[sn]) > 0:
        fig_dialog_finished_stop_skill_day.add_scatter(name=sn,x=x[sn],y=value_v[sn], customdata=value_c[sn], line={'dash': 'dot'}, hovertemplate = '%{y:.2f}: count: %{customdata[0]} rating: %{customdata[1]:.2f} n_turns: %{customdata[2]:.2f}', row=1, col=1)
        min_v = min(min_v, min(value_v[sn]))
        max_v = max(max_v, max(value_v[sn]))
    
for d, r in releases.values:
    if d > start:
        fig_dialog_finished_stop_skill_day.add_shape(dict(type="line", x0=d, y0=min_v, x1=d, y1=max_v, line=dict(color="RoyalBlue", width=1)), row=1, col=1)
        fig_dialog_finished_stop_skill_day.add_annotation(x=d,y=max_v, text=r, textangle=-90, showarrow=True, font=dict(color="black",size=10), opacity=0.7, row=1, col=1)
        
fig_dialog_finished_stop_skill_day.update_layout(height=500, width=1300, showlegend=True)
#fig_dialog_finished_skill_day['layout']['yaxis1']['range'] = [0, 0.5]
fig_dialog_finished_stop_skill_day.update_layout(hovermode='x')
fig_dialog_finished_stop_skill_day.show()

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig_dialog_finished_skill_all_day = make_subplots(rows=1, cols=1, subplot_titles=('Last skill in dialog, all, Last 24h',))

now = datetime.now(tz=tz.gettz('UTC'))
end = now
start = end - timedelta(days=14)

x = dict()
value_v = dict()
value_c = dict()
skill_names = set(skill_names)
for n in skill_names:
    value_c[n] = []
    value_v[n] = []
    x[n] = []
    
for dt in pd.date_range(start=start, end=end, freq='D'):
    daily_data = dialog_finished_df[(dialog_finished_df['date'] < dt) & (dialog_finished_df['date'] >= dt - dt.freq)]
    #daily_data = daily_data[daily_data['alexa_command'] == '/alexa_stop_handler']
    for sn in skill_names:
        d = daily_data[daily_data['last_skill'] == sn]
        if len(d) > 2:
            value_v[sn] += [len(d) / len(daily_data)]
            #value_c[sn] += [[len(d), d['rating'].mean(), d['n_turns'].mean()]]
            value_c[sn] += [[len(d), d['n_turns'].mean()]]
            x[sn] += [dt]

min_v, max_v = 10**10, - 10 ** 10
for sn in sorted(list(skill_names)):
    if len(value_v[sn]) > 0:
        fig_dialog_finished_skill_all_day.add_scatter(name=sn,x=x[sn],y=value_v[sn], customdata=value_c[sn], line={'dash': 'dot'}, hovertemplate = '%{y:.2f}: count: %{customdata[0]} n_turns: %{customdata[1]:.2f}', row=1, col=1)
        min_v = min(min_v, min(value_v[sn]))
        max_v = max(max_v, max(value_v[sn]))
    
for d, r in releases.values:
    if d > start:
        fig_dialog_finished_skill_all_day.add_shape(dict(type="line", x0=d, y0=min_v, x1=d, y1=max_v, line=dict(color="RoyalBlue", width=1)), row=1, col=1)
        fig_dialog_finished_skill_all_day.add_annotation(x=d,y=max_v, text=r, textangle=-90, showarrow=True, font=dict(color="black",size=10), opacity=0.7, row=1, col=1)

fig_dialog_finished_skill_all_day.update_layout(height=500, width=1300, showlegend=True)
#fig_dialog_finished_skill_day['layout']['yaxis1']['range'] = [0, 0.5]
fig_dialog_finished_skill_all_day.update_layout(hovermode='x')
fig_dialog_finished_skill_all_day.show()

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig_dialog_finished_all_day = make_subplots(rows=1, cols=1, subplot_titles=('Dialog finished reason, all, Last 24h',))

now = datetime.now(tz=tz.gettz('UTC'))
end = now
start = end - timedelta(days=14)

x = dict()
value_v = dict()
value_c = dict()
finished_values = set(dialog_finished_df['alexa_command'].unique()) - {'no_alexa_command'}
finished_values |= {'no_command_no_goodbye', 'bot_goodbye'}
finished_values -= {'alexa handler: command logged'}
for n in finished_values:
    value_v[n] = []
    value_c[n] = []
    x[n] = []
    
for dt in pd.date_range(start=start, end=end, freq='D'):
    daily_data = dialog_finished_df[(dialog_finished_df['date'] < dt) & (dialog_finished_df['date'] >= dt - dt.freq * 1)]
    for v in finished_values:
        if v.startswith('/'):
            v_count = (daily_data['alexa_command'] == v).sum()
            avg_n_turns = daily_data[daily_data['alexa_command'] == v]['n_turns'].mean()
        else:
            v_count = daily_data[v].sum()
            avg_n_turns = daily_data[daily_data[v]]['n_turns'].mean()
        
        if v_count > 0:
            value_v[v] += [v_count / len(daily_data)]
            value_c[v] += [[v_count, avg_n_turns]]
            x[v] += [dt]
        
for r in sorted(list(finished_values), reverse=True):
    fig_dialog_finished_all_day.add_bar(name=r,x=x[r],y=value_v[r], customdata=value_c[r], hovertemplate = '%{y:.2f}: count: %{customdata[0]} n_turns:  %{customdata[1]:.2f}', row=1, col=1)
    
fig_dialog_finished_all_day.update_layout(height=500, width=1300, showlegend=True)
fig_dialog_finished_all_day['layout']['yaxis1']['range'] = [0, 1]
fig_dialog_finished_all_day.update_layout(hovermode='x', barmode='stack')
fig_dialog_finished_all_day.show()

In [None]:
now = datetime.now(tz=tz.gettz('UTC'))
end = now
start = end - timedelta(days=6)
end = start + timedelta(days=1)
dialog_finished_df[(dialog_finished_df['date'] < end) & (dialog_finished_df['date'] >= start) & (dialog_finished_df['alexa_command']=='/alexa_USER_INITIATED')]

In [None]:
max_n = 30
x=[]
y=[]
z=[]
n_days = 7
now = datetime.now(tz=tz.gettz('UTC'))
start_date = (now - timedelta(days = n_days))
start_date = pd.Timestamp(start_date)
daily_ratings = skills_ratings[skills_ratings['date'] >= start_date]
count = daily_ratings.groupby(['n_turns','rating']).count()['date']
for i in range(1,max_n):
    for j in (count[i].keys()):
#        if count[i][j] // i > 0:
        x.append(i)
        y.append(j)
        z.append(count[i][j]//i)
rating_by_n_turns_fig = go.Figure(data=[go.Scatter(
    x=x,
    y=y,
    mode='markers',
    marker=dict(
        size=[j / 1.5 for j in z],
        ),
    customdata=z,
    hovertemplate = '%{y:.2f}: count: %{customdata}',
    name='Rating by n_utt'
)])
rating_by_n_turns_fig.update_layout(title='Rating by n_turns for last {:d} days'.format(n_days), showlegend=False, height=500, width=1300)
rating_by_n_turns_fig['layout']['yaxis']['range'] = [0.1, 5.9]
rating_by_n_turns_fig['layout']['xaxis']['title'] = {'text': 'n_turns'}
rating_by_n_turns_fig['layout']['yaxis']['title'] = {'text': 'rating'}
rating_by_n_turns_fig.show()

In [None]:
# todo: make it faster, compute on db side
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.sql import text
from tqdm import tqdm as tqdm
from collections import defaultdict

# run ssh tunnel: ssh alexa-tool -NL 5432:localhost:5432
user = 'alexa'
password = 'alexa'
host='127.0.0.1'
dbname='alexaprize'

db_uri = f'postgresql://{user}:{password}@{host}/{dbname}'
engine = create_engine(db_uri, pool_timeout=60)
session_maker = sessionmaker(bind=engine)


returned_users_fig = make_subplots(rows=1, cols=1, subplot_titles=(f'Dialogs with returned users, daily',))
now = datetime.now(tz=tz.gettz('UTC'))
end = datetime(year=now.year, month=now.month, day=now.day, hour=8, tzinfo=now.tzinfo)
n_days = 14
start = end - timedelta(days=n_days)

x = []
y = []
z = []
for i in tqdm(range(n_days)):
    try:
        x += [start.date()]
        start = start
        end = start + timedelta(days=1)

        session = session_maker()
        t = session.execute(text(f"select COUNT(DISTINCT amazon_conv_id) as user_id from conversation WHERE date_start BETWEEN '{start}' and '{end}'"))
        t = int(t.fetchone()[0])
        k = session.execute(text(f"SELECT SUM(n_count) FROM (select human->'user_telegram_id', COUNT(amazon_conv_id) as n_count "
                                 f"FROM conversation "
                                 f"WHERE date_start BETWEEN '{start}' and '{end}' and human->>'user_telegram_id' LIKE 'amzn1%' "
                                 f"GROUP BY human->'user_telegram_id') as tmp WHERE tmp.n_count > 1"))
        k = int(k.fetchone()[0])    
    except:
        x = x[:-1]
        continue
    
    y += [k/t*100]
    z += [[k, t]]
    #print(f'{start.date()}: number of dialogs with returned users: {k/t*100:.2f}% = {k}/{t}')
    
    start = end
    
returned_users_fig.add_bar(name='Dialogs with returned users',x=x,y=y, customdata=z, hovertemplate = '%{y:.2f}% = %{customdata[0]}/%{customdata[1]}',
            text=[f'{el:.2f}%' for el in y], textposition='outside',
            row=1, col=1)

returned_users_fig.update_layout(height=500, width=1000, showlegend=False)
returned_users_fig.update_layout(hovermode='x', barmode='stack', xaxis={'type': 'category'})

### A/B tests plots

In [None]:
ab_tests = []
for i, (d, r) in enumerate(releases.values):
    if '/' in r:
        date_start = d
        if i > 0:
            date_end = releases.iloc[i-1, 0]
        else:
            date_end = datetime.now(tz=tz.gettz('UTC'))
        ab_tests += [{'date_start': date_start, 'date_end': date_end, 'name': r }]

ab_test = ab_tests[0]

In [None]:
versioned_ratings = skills_ratings[skills_ratings['version'] != 'no_info']
versioned_ratings['active_skill'] = versioned_ratings['active_skill'] + '_' + versioned_ratings['version']

In [None]:
ab_test = ab_tests[0]
start = ab_test['date_start']
end = ab_test['date_end']
versions = ab_test['name'].split('/')

only_dialogs = skills_ratings.groupby('conv_id').first()
ab_test_dialogs = only_dialogs[(only_dialogs['date'] <= end) & (only_dialogs['date'] >= start)][::-1]
ab_test_dialogs = ab_test_dialogs[only_dialogs['version'].isin(versions)]

versions_data = {el: {'ratings': [], 'n_utt': []} for el in versions}
for v in versions:
    ab_test_version_dialogs = ab_test_dialogs[ab_test_dialogs['version'] == v]
    versions_data[v]['ratings'] = ab_test_version_dialogs['rating'].values.tolist()
    versions_data[v]['n_utt'] = (ab_test_version_dialogs['n_turns'].values * 2).tolist()

In [None]:
ab_test = ab_tests[0]
start = ab_test['date_start']
end = ab_test['date_end']
versions = ab_test['name'].split('/')

only_dialogs = skills_ratings.groupby('conv_id').first()
ab_test_dialogs = only_dialogs[(only_dialogs['date'] <= end) & (only_dialogs['date'] >= start)][::-1]

In [None]:
import itertools
from scipy import stats as sts
from tqdm import tqdm as tqdm

In [None]:
class TextHtml:
    def __init__(self, text):
        self.text = text
    def to_html(self):
        html = '<html>\n<head></head>\n<body>\n<hr>\n'
        for line in self.text.split('\n'):
            html += line + '<br>\n'
        html += '</body>\n</html>'
        return html
    def __str__(self):
        return self.text
    def __repr__(self):
        return self.text

def get_report(versions_data):
    r = ''
    for v in versions_data:
        for s in versions_data[v]:
            d = versions_data[v][s]
            r += f'{v:10s} {s:>7s}: median: {np.median(d):5.2f} mean: {np.mean(d):5.2f} std: {np.std(d):5.2f} count: {len(d)}\n'
    for v1, v2 in itertools.combinations(versions_data.keys(), r=2):
        _, p_val = sts.mannwhitneyu(versions_data[v1]['ratings'], versions_data[v2]['ratings'], alternative='two-sided')
        r += f'Mann-Whitney test: {v1} vs {v2} ratings: {p_val:.3f} p-value'
    return TextHtml(r)

In [None]:
get_report(versions_data)

In [None]:
ab_test_dialogs.sort_values(by='date')

In [None]:
avg_n_dialogs = 20
ab_tests_plots = {}
from bisect import bisect_left

for ab_test in tqdm(ab_tests):
    n_turns = 0
    end = ab_test['date_end']
    start = ab_test['date_start']
    versions = ab_test['name'].split('/')
    
    only_dialogs = skills_ratings.groupby('conv_id').first()
    ab_test_dialogs = only_dialogs[(only_dialogs['date'] <= end) & (only_dialogs['date'] >= start)][::-1]
    ab_test_dialogs = ab_test_dialogs[only_dialogs['version'].isin(versions)]
    #ab_test_dialogs = ab_test_dialogs[only_dialogs['n_turn'] > n_turns]
    if len(ab_test_dialogs) == 0:
        continue
        
    ab_tests_plots[(ab_test['name'], ab_test['date_start'])] = []

    versions_data = {el: {'ratings': [], 'n_utt': []} for el in versions}
    for v in versions:
        ab_test_version_dialogs = ab_test_dialogs[ab_test_dialogs['version'] == v]
        versions_data[v]['ratings'] = ab_test_version_dialogs['rating'].values.tolist()
        versions_data[v]['n_utt'] = (ab_test_version_dialogs['n_turns'].values * 2).tolist()
        
    ab_tests_plots[(ab_test['name'], ab_test['date_start'])] += [get_report(versions_data)]
    ab_test['report'] = get_report(versions_data)
    
    ##########################
    ema_alpha = 0.5
    ab_test_skill_counts_relative_bar_fig = make_subplots(rows=1, cols=1, subplot_titles=(f'A/B tests: {ab_test["name"]}: Skills was selected, relative, by version',))


    skills_ratings_by_range = dialog_skills_weights_data[(dialog_skills_weights_data['date'] <= end) & (dialog_skills_weights_data['date'] >= start)][::-1]
    skills_ratings_by_range = skills_ratings_by_range[skills_ratings_by_range['version'].isin(versions)]
    skills_ratings_by_range = skills_ratings_by_range[skills_ratings_by_range['n_turns'] > n_turns]

    skill_names = set([c[:-len(f'_{ema_alpha}_w')] for c in skills_ratings_by_range.columns if c.endswith(f'_{ema_alpha}_w')]) - set(['no_skill_name'])
    # clean skills that was not actived
    to_remove = set()
    for sn in skill_names:
        if skills_ratings_by_range[f'{sn}_n'].sum() == 0:
            to_remove.add(sn)
    skill_names -= to_remove


    x, y, z = {v: [] for v in versions}, {v: [] for v in versions}, {v: [] for v in versions}

    skill_ratings_values = {sn: {v: [] for v in versions} for sn in skill_names}

    for v in versions:
        version_ratings_by_range = skills_ratings_by_range[skills_ratings_by_range['version']==v]
        d = version_ratings_by_range
        for sn in sorted(skill_names):
            skill_active_n = d[f'{sn}_n'].sum()
            r = 0
            if d[f'{sn}_{ema_alpha}_w'].sum() > 0:
                r = (d[f'{sn}_{ema_alpha}_w'] * d['rating']).sum() / d[f'{sn}_{ema_alpha}_w'].sum()
            x[v] += [sn]
            y[v] += [skill_active_n / d['n_turns'].sum()]
            z[v] += [[sn, skill_active_n, r]]

    for v in versions:
        ab_test_skill_counts_relative_bar_fig.add_bar(name=v, x=x[v], y=y[v], customdata=z[v], hovertemplate = v + ': %{y:.2f}: count: %{customdata[1]}; rating_ema: %{customdata[2]:.2f}')
    ab_test_skill_counts_relative_bar_fig.update_layout(height=500, width=1300, showlegend=True)
    ab_test_skill_counts_relative_bar_fig.update_layout(hovermode='x')
    ab_tests_plots[(ab_test['name'], ab_test['date_start'])] += [ab_test_skill_counts_relative_bar_fig]
    ##########################
    min_n_active_skill = 10
    ab_test_ratings_bar_fig = make_subplots(rows=1, cols=1, subplot_titles=(f'A/B tests: {ab_test["name"]}: Skills Ratings by version, skill activated > {min_n_active_skill}',))


    skills_ratings_by_range = skills_ratings[(skills_ratings['date'] <= end) & (skills_ratings['date'] >= start)][::-1]
    skills_ratings_by_range = skills_ratings_by_range[skills_ratings_by_range['version'].isin(versions)]
    skills_ratings_by_range = skills_ratings_by_range[skills_ratings_by_range['n_turns'] > n_turns]

    aggr_names = set(['_avg_by_dialog', '_total'])
    skill_names = set(skills_ratings_by_range['active_skill']) - set(['no_skill_name']) | aggr_names

    x, y, z = {v: [] for v in versions}, {v: [] for v in versions}, {v: [] for v in versions}

    skill_ratings_values = {sn: {v: [] for v in versions} for sn in skill_names}

    for v in versions:
        version_ratings_by_range = skills_ratings_by_range[skills_ratings_by_range['version']==v]
        for sn in sorted(skill_names):
            if sn in aggr_names:
                sr_gb = version_ratings_by_range.groupby('conv_id', sort=False)
                d = sr_gb.first()
                x[v] += [sn]
                if sn == '_total':
                    y[v] += [(d['rating'] * d['n_turns']).sum() / d['n_turns'].sum()]
                    ratings_unfold = []
                    for r, c in d[['rating', 'n_turns']].values:
                        ratings_unfold += [r] * int(c)
                    skill_ratings_values[sn][v] = ratings_unfold[:]
                    z[v] += [[sn, d['n_turns'].sum()]]
                elif sn == '_avg_by_dialog':
                    y[v] += [d['rating'].mean()]
                    skill_ratings_values[sn][v] = d['rating'].values.tolist()
                    z[v] += [[sn, len(d)]]
            else:
                sn_version_ratings = version_ratings_by_range[version_ratings_by_range['active_skill']==sn]
                if len(sn_version_ratings) > 0:
                    sr_gb = sn_version_ratings.groupby('conv_id', sort=False)
                    d = sr_gb.first()
                    d['cnt'] = sr_gb['rating'].count()
                    if d['cnt'].sum() > min_n_active_skill:
                        d['r*cnt'] = d['cnt'] * d['rating']
                        x[v] += [sn]
                        y[v] += [d['r*cnt'].sum() / d['cnt'].sum()]
                        ratings_unfold = []
                        for r, c in d[['rating', 'cnt']].values:
                            ratings_unfold += [r] * int(c)
                        skill_ratings_values[sn][v] = ratings_unfold[:]
                        z[v] += [[sn, d['cnt'].sum()]]

    for v in versions:
        for i, el in enumerate(z[v]):
            sn = el[0]
            s_ratings = skill_ratings_values[sn][v]
            other_ratings = skill_ratings_values[sn][list(set(versions) - set([v]))[0]]
            if len(s_ratings) > 0 and len(other_ratings) > 0:
                _, p_val = sts.mannwhitneyu(s_ratings, other_ratings, alternative='greater')
                z[v][i] += [p_val]
            else:
                z[v][i] += [0]


    for v in versions:
        ab_test_ratings_bar_fig.add_bar(name=v, x=x[v], y=y[v], customdata=z[v], hovertemplate = v + ': %{y:.2f}: pval: %{customdata[2]:.3f} count: %{customdata[1]}')
    ab_test_ratings_bar_fig.update_layout(height=500, width=1300, showlegend=True)
    ab_test_ratings_bar_fig.update_layout(hovermode='x')
    ab_tests_plots[(ab_test['name'], ab_test['date_start'])] += [ab_test_ratings_bar_fig]
    ##########################
    
    for ema_alpha in ema_alphas:
        min_n_active_skill = 10
        ab_test_ratings_ema_bar_fig = make_subplots(rows=1, cols=1, subplot_titles=(f'A/B tests: {ab_test["name"]}: Skills Ratings by version, EMA ({ema_alpha}), skill activated > {min_n_active_skill}',))


        skills_ratings_by_range = dialog_skills_weights_data[(dialog_skills_weights_data['date'] <= end) & (dialog_skills_weights_data['date'] >= start)][::-1]
        skills_ratings_by_range = skills_ratings_by_range[skills_ratings_by_range['version'].isin(versions)]
        skills_ratings_by_range = skills_ratings_by_range[skills_ratings_by_range['n_turns'] > n_turns]

        aggr_names = set(['_avg_by_dialog', '_total'])
        skill_names = set([c[:-len(f'_{ema_alpha}_w')] for c in skills_ratings_by_range.columns if c.endswith(f'_{ema_alpha}_w')]) - set(['no_skill_name']) | aggr_names

        x, y, z = {v: [] for v in versions}, {v: [] for v in versions}, {v: [] for v in versions}

        skill_ratings_values = {sn: {v: [] for v in versions} for sn in skill_names}

        for v in versions:
            version_ratings_by_range = skills_ratings_by_range[skills_ratings_by_range['version']==v]
            d = version_ratings_by_range
            for sn in sorted(skill_names):
                if sn in aggr_names:
                    x[v] += [sn]
                    if sn == '_total':
                        y[v] += [(d['rating'] * d['n_turns']).sum() / d['n_turns'].sum()]
                        z[v] += [[sn, d['n_turns'].sum()]]
                    elif sn == '_avg_by_dialog':
                        y[v] += [d['rating'].mean()]
                        z[v] += [[sn, len(d)]]
                else:
                    skill_active_n = d[f'{sn}_n'].sum()
                    if d[f'{sn}_{ema_alpha}_w'].sum() > 0 and skill_active_n > min_n_active_skill:
                        r = (d[f'{sn}_{ema_alpha}_w'] * d['rating']).sum() / d[f'{sn}_{ema_alpha}_w'].sum()
                        x[v] += [sn]
                        y[v] += [r]
                        z[v] += [[sn, skill_active_n]]

        for v in versions:
            ab_test_ratings_ema_bar_fig.add_bar(name=v, x=x[v], y=y[v], customdata=z[v], hovertemplate = v + ': %{y:.2f}: count: %{customdata[1]}')
        ab_test_ratings_ema_bar_fig.update_layout(height=500, width=1300, showlegend=True)
        ab_test_ratings_ema_bar_fig.update_layout(hovermode='x')
        ab_tests_plots[(ab_test['name'], ab_test['date_start'])] += [ab_test_ratings_ema_bar_fig]
    ##########################
    
    end = ab_test['date_end']
    start = ab_test['date_start']
    versions = ab_test['name'].split('/')
    n_turns = 1

    dialog_finished_by_range = dialog_finished_df[(dialog_finished_df['date'] <= end) & (dialog_finished_df['date'] >= start) & dialog_finished_df['has_rating']][::-1]
    dialog_finished_by_range = dialog_finished_by_range[dialog_finished_by_range['version'].isin(versions)]
    dialog_finished_by_range = dialog_finished_by_range[dialog_finished_by_range['n_turns'] > n_turns]
    dialog_finished_by_range['rating'] = dialog_finished_by_range['rating'].apply(float)
    #

    last_skill_count = dialog_finished_by_range.groupby('version')['last_skill'].value_counts().to_dict()
    last_skill_rating = dialog_finished_by_range.groupby(['version', 'last_skill'])['rating'].mean().to_dict()

    skills_ratings_by_range = skills_ratings[(skills_ratings['date'] <= end) & (versioned_ratings['date'] >= start)][::-1]
    skills_ratings_by_range = skills_ratings_by_range[skills_ratings_by_range['version'].isin(versions)]
    skills_ratings_by_range = skills_ratings_by_range[skills_ratings_by_range['n_turns'] > n_turns]
    skill_count = skills_ratings_by_range.groupby(['version','active_skill'])['rating'].count().to_dict()

    ab_tests_last_skill_fig = make_subplots(rows=1, cols=1, subplot_titles=(f'A/B tests: {ab_test["name"]}: Last skill rating, n_turns > {n_turns}',))
    for v in versions:
        x = []
        y = []
        c = []
        for sn in sorted(skill_names):
            if (v, sn) in skill_count:
                sc = skill_count[(v, sn)]
                if (v,sn) in last_skill_count:
                    lc = last_skill_count[(v, sn)]
                    r = last_skill_rating[(v, sn)]
                else:
                    lc = 0
                    r = 0
                if lc >= 2 and sc > 10:
                    x += [sn]
                    #y += [lc/sc]
                    y += [r]
                    c += [[lc, sc, lc/sc]]
        ab_tests_last_skill_fig.add_bar(name=v, x=x, y=y, customdata=c, hovertemplate = v + ': %{y:.2f}: last/total = %{customdata[0]}/%{customdata[1]} = %{customdata[2]:.2f}')
    ab_tests_last_skill_fig.update_layout(height=500, width=1300, showlegend=True)
    ab_tests_last_skill_fig.update_layout(hovermode='x')
    ab_tests_plots[(ab_test['name'], ab_test['date_start'])] += [ab_tests_last_skill_fig]
    ##########################
    
    ab_test_hist_ratings = make_subplots(rows=1, cols=1, subplot_titles=(f'A/B tests: {ab_test["name"]}: Ratings',))
    x = dict()
    skill_r = dict()
    skill_c = dict()
    ratings_values = list(range(6))
    for n in ratings_values:
        skill_r[n] = []
        skill_c[n] = []
        x[n] = []

    skills_ratings_by_range = versioned_ratings[(versioned_ratings['date'] <= end) & (versioned_ratings['date'] >= start)][::-1]
    skills_ratings_by_range = skills_ratings_by_range[skills_ratings_by_range['version'].isin(versions)]

    for ver in versions:
        d = skills_ratings_by_range[skills_ratings_by_range['version'] == ver]
        d = d.groupby('conv_id').first()
        d['rating_round'] = d['rating'].apply(round)
        rating_counts = d.groupby('rating_round').count()['rating']
        for r, v in rating_counts.items():
            skill_r[r] += [v / len(d)]
            skill_c[r] += [v]
            x[r] += [ver]

    for r in ratings_values:
        ab_test_hist_ratings.add_bar(name=r,x=x[r],y=skill_r[r], customdata=skill_c[r], hovertemplate = '%{y:.2f}: count: %{customdata}')

    ab_test_hist_ratings.update_layout(height=500, width=500, showlegend=True)
    #ab_test_hist_ratings['layout']['yaxis1']['range'] = [0, 1]
    ab_test_hist_ratings.update_layout(hovermode='x', barmode='stack')
    
    ab_tests_plots[(ab_test['name'], ab_test['date_start'])] += [ab_test_hist_ratings]
    
    
    ##########################
    ab_tests_rating_by_n_turns = make_subplots(rows=1, cols=1, subplot_titles=(f'A/B tests: {ab_test["name"]}: Rating by n_turns',))
    max_n = 1000

    skills_ratings_by_range = versioned_ratings[(versioned_ratings['date'] <= end) & (versioned_ratings['date'] >= start)][::-1]
    skills_ratings_by_range = skills_ratings_by_range[skills_ratings_by_range['version'].isin(versions)]
    # skills_ratings_by_range = skills_ratings_by_range[skills_ratings_by_range['n_turns'] > n_turns]
    skills_ratings_by_range = skills_ratings_by_range.groupby('conv_id', sort=False).first()
    skills_ratings_by_range['rating_round'] = skills_ratings_by_range['rating'].apply(round)
    bins = [0, 1, 2, 5, 10, 15, 20, 30, 40, 50, 75, 100, 200, 350, 500, 700, 1000]
    x, y, z = {v: [] for v in versions}, {v: [] for v in versions}, {v: [] for v in versions}

    max_size = 75
    max_dialogs = 0
    max_cd = 0
    for ver in versions:
        d = skills_ratings_by_range[skills_ratings_by_range['version']==ver]
        if len(d) > 0:
            max_dialogs = max(max_dialogs, len(d))
            for i in range(len(bins) - 1):
                for v, c in d[(d['n_turns'] <= bins[i + 1]) & (d['n_turns'] > bins[i])]['rating_round'].value_counts().items():
                    x[ver].append(f'<={bins[i+1]}')
                    y[ver].append(v)
                    z[ver].append(c)
                    max_cd = max(max_cd, c / len(d))
    
    for ver in versions:
        d = skills_ratings_by_range[skills_ratings_by_range['version']==ver]
        if len(d) > 0:
            ab_tests_rating_by_n_turns = ab_tests_rating_by_n_turns.add_scatter(
                x=x[ver],
                y=y[ver],
                mode='markers',
                marker=dict(
                    size=[j / len(d) / max_cd * max_size for j in z[ver]],
                    ),
                customdata=z[ver],
                hovertemplate = ver + ': %{y:.2f}: count: %{customdata}', name=ver)

    ab_tests_rating_by_n_turns['layout']['yaxis']['range'] = [0.1, 5.9]
    ab_tests_rating_by_n_turns['layout']['xaxis']['title'] = {'text': 'n_turns'}
    ab_tests_rating_by_n_turns['layout']['yaxis']['title'] = {'text': 'rating'}
    ab_tests_rating_by_n_turns.update_layout(height=500, width=1300, showlegend=True)
    ab_tests_plots[(ab_test['name'], ab_test['date_start'])] += [ab_tests_rating_by_n_turns]

### Make html

In [None]:
plots = [fig, fig_versions_ratings, fig_versions_ratings_ema, fig_versions_ratings_ema_more, fig_versions_ratings_ema_less, fig_daily_hist_ratings, fig_version_ratings, rating_by_n_turns_fig, fig_daily_counts_relative,
         fig_moving_avg, fig_moving_avg_d_total, fig_moving_avg_d_total_less, dialog_time, shares_n_utt,
         fig_dialog_finished_day, fig_dialog_finished_all_day, 
         fig_dialog_finished_skill_day, fig_dialog_finished_stop_skill_day, fig_dialog_finished_skill_all_day, returned_users_fig
        ]

htmls = [plot.to_html() for plot in plots]

In [None]:
def add_plots_titles(html_base, plots):
    head_part = html_base.split('\n')[:3]
    main_part = html_base.split('\n')[3:]
    titles_list = '\n<br>Plots:\n<ul>\n'
    for plot in plots:
        if isinstance(plot, TextHtml):
            continue
        elif len(plot['layout']['annotations']) > 0:
            for el in plot['layout']['annotations']:
                if not el['showarrow']:
                    titles_list += f"<li>{el['text']}</li>\n"
        else:
            titles_list += f"<li>{plot['layout']['title']['text']}</li>\n"
    titles_list += '</ul>\n'
    return '\n'.join(head_part) + titles_list + '\n'.join(main_part)

In [None]:
def append_plots(html_base, new_html):
    if html_base == None:
        return new_html
    plot_to_add = new_html.split('\n')[3:-2]
    html_base = '\n'.join(html_base.split('\n')[0:-2] + plot_to_add + html_base.split('\n')[-2:])
    return html_base

In [None]:
def add_link(html_base, link, name):
    # adds link to the top of page
    head_part = html_base.split('\n')[:3]
    main_part = html_base.split('\n')[3:]
    return '\n'.join(head_part) + f'\n<div><a href="{link}">{name}</a></div>\n' + '\n'.join(main_part)

In [None]:
def add_br(html_base):
    # adds link to the top of page
    head_part = html_base.split('\n')[:3]
    main_part = html_base.split('\n')[3:]
    return '\n'.join(head_part) + f'\n<br/>\n' + '\n'.join(main_part)

In [None]:
# a/b tests pages
for v, d in tqdm(ab_tests_plots):
    merged_html = None
    for plot in ab_tests_plots[(v, d)]:
        merged_html = append_plots(merged_html, plot.to_html())
    
    # merged_html = add_plots_titles(merged_html, ab_tests_plots[(v, d)])
    
    for r in reversed(v.split('/')):
        merged_html = add_link(merged_html, f'https://github.com/sld/dp-agent-alexa/releases/tag/v{r}', f'Release v{r}')
    merged_html = add_br(merged_html)
    merged_html = add_link(merged_html, 'https://7052.lnsigo.mipt.ru/alexa_prize_plots.html', 'Main page')
    merged_html = add_link(merged_html, 'https://7052.lnsigo.mipt.ru/ab_tests/ab_tests_main.html', 'A/B tests')
    merged_html = add_link(merged_html, 'http://ec2-3-90-214-142.compute-1.amazonaws.com/admin/', 'Dialogs analysis tool')
    merged_html = add_link(merged_html, 'https://7052.lnsigo.mipt.ru/autoindex/#/dp_agent_alexa_data/dialogs_dumps_txt', 'Dialogs hourly dumps')
    with open(f'ab_tests/{v.replace("/", "_")}_{d.date()}.html', 'w') as fout:
        fout.write(merged_html)

In [None]:
# a/b tests main page
merged_html = '<html>\n<head>\n</head>\n<body>\n</body>\n</html>'

for v, d in list(ab_tests_plots.keys())[::-1]:
    merged_html = add_link(merged_html, f'https://7052.lnsigo.mipt.ru/ab_tests/{v.replace("/", "_")}_{d.date()}.html', f'{d.date()} {v}')

merged_html = add_br(merged_html)
    
merged_html = add_link(merged_html, 'https://7052.lnsigo.mipt.ru/alexa_prize_plots.html', 'Main page')
merged_html = add_link(merged_html, 'http://ec2-3-90-214-142.compute-1.amazonaws.com/admin/', 'Dialogs analysis tool')
merged_html = add_link(merged_html, 'https://7052.lnsigo.mipt.ru/autoindex/#/dp_agent_alexa_data/dialogs_dumps_txt', 'Dialogs hourly dumps')
with open('ab_tests/ab_tests_main.html', 'w') as fout:
    fout.write(merged_html)

In [None]:
# main page
merged_html = None
for plot in htmls:
    merged_html = append_plots(merged_html, plot)

merged_html = add_plots_titles(merged_html, plots)
merged_html = add_link(merged_html, 'https://7052.lnsigo.mipt.ru/ab_tests/ab_tests_main.html', 'A/B tests')
merged_html = add_link(merged_html, 'http://ec2-3-90-214-142.compute-1.amazonaws.com/admin/', 'Dialogs analysis tool')
merged_html = add_link(merged_html, 'https://7052.lnsigo.mipt.ru/autoindex/#/dp_agent_alexa_data/dialogs_dumps_txt', 'Dialogs hourly dumps')
with open('alexa_prize_plots.html', 'w') as fout:
    fout.write(merged_html)

#### send notification to slack

In [None]:
import os
import requests
import json
SLACK_WEBHOOK = 'https://hooks.slack.com/services/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'

send_rating_datafile = DATA_DIR + '/send_rating.data'
new_ratings_arrived = False

if not os.path.isfile(send_rating_datafile):
    new_ratings_arrived = True
else:
    with open(send_rating_datafile, 'r') as fin:
        v = int(fin.readline().strip())
        if v < len(data):
            new_ratings_arrived = True

with open(send_rating_datafile, 'w') as fout:
    fout.write(f'{len(data)}')


if new_ratings_arrived:
    ab_test = None
    for ab_test in ab_tests:
        if 'report' in ab_test:
            break

    url = f'https://7052.lnsigo.mipt.ru/ab_tests/{ab_test["name"].replace("/", "_")}_{ab_test["date_start"].date()}.html'
    text = f"results for the latest A/B test:\n{ab_test['report']}\n{url}"
    payload={"text": text}
    requests.request(url=SLACK_WEBHOOK, data=json.dumps(payload), method='POST')