In [None]:
from uuid import uuid4
from datetime import datetime, timedelta

import numpy as np
import pandas as pd

import plotly.express as px
import plotly.graph_objects as go

In [None]:
def get_dataset(size, days) -> pd.DataFrame:

    end = datetime.today()
    start = today - timedelta(days=days)

    data = pd.DataFrame(data={
        'user_id': [str(uuid4()) for _ in range(size)],
        'group':   np.random.choice(['old_version', 'new_version'], size=size),
        'timestamp': pd.date_range(start=start, end=end, periods=size)
    })

    old_version_index = data[data['group'] == 'old_version'].index
    new_version_index = data[data['group'] == 'new_version'].index

    data.loc[old_version_index, 'converted'] = np.random.choice(
                                                        [0, 1],
                                                        size=(len(old_version_index), 1),
                                                        p=[0.8, 0.2]
                                                    )

    data.loc[new_version_index, 'converted'] = np.random.choice(
                                                        [0, 1],
                                                        size=(len(new_version_index), 1),
                                                        p=[0.75, 0.25]
                                                    )

    data['converted'] = data['converted'].astype('int')

    data.loc[old_version_index, 'avg_check'] = np.random.normal(
                                                        size=len(old_version_index),
                                                        loc=15,
                                                        scale=7
                                                    )

    data.loc[new_version_index, 'avg_check'] = np.random.normal(
                                                        size=len(new_version_index),
                                                        loc=17,
                                                        scale=6.4
                                                    )

    return data

In [None]:
today = datetime.today()
start = today - timedelta(days=30)

f'start: {start.strftime("%Y-%m-%d %H:%M:%S")} ||| end: {today.strftime("%Y-%m-%d %H:%M:%S")}'

In [None]:
data = get_dataset(1000, 15)
data['timestamp'] = pd.to_datetime(data['timestamp'])

In [None]:
f"min: {data['timestamp'].min()} ||| max: {data['timestamp'].max()}"

In [None]:
fig = go.Figure()

fig.add_trace(
    go.Histogram(
        dict(
            x=data[data['group'] == 'old_version']['converted'],
            name='old_version'
        )
    )
)

fig.add_trace(
    go.Histogram(
        dict(
            x=data[data['group'] == 'new_version']['converted'],
            name='new_version'
        )
    )
)

fig.update_traces(hovertemplate="Сконвертирован: %{x}<br>"
                                "Количество: %{y}")

fig.update_layout(
    title='Распределение конверсий в новой и старой версии сайта'
)


fig.update_xaxes(
    title='Сконвертирован'
)

fig.update_yaxes(
    title='Количество'
)

fig.show()

In [None]:
data['timestamp'].dt.strftime('%Y-%m-%d').groupby('timestamp').agg('count')

In [None]:
data.groupby(pd.Grouper(key='timestamp', freq='D')).count().rename({'user_id': 'count'}, axis=1)['count']

In [None]:
res = data.groupby(pd.Grouper(key='timestamp', freq='D')).count().rename({'user_id': 'count'}, axis=1)['count']
px.histogram(
    x = res.index,
    y = res,
    nbins=len(res.index)
)