In [3]:
from collections import namedtuple
from datetime import datetime
from datetime import timedelta

import altair as alt
import math
import pandas as pd

import matplotlib.pyplot as plt
import plotly.express as px

import os
import numpy as np

from utils import chart

from PIL import Image


In [4]:
df = pd.DataFrame()

for i in sorted(os.listdir("./files/")):
    df = df.append(pd.read_csv(f"./files/{i}"), ignore_index=True)

df["dt_create"] = pd.to_datetime(df["dt_create"], format="%Y-%m-%d")
df["dt_create"] = df["dt_create"].apply(lambda x: x.strftime("%m-%d"))
df["count_mons"] = df["count_mon_full_rec"].copy()


not_in_monita = df[df["not_in_monita"] == True].copy()
pivot_deleted = pd.pivot_table(
    not_in_monita,
    index="dt_create",
    values=["count_cv_full_rec", "count_cv_any_rec"],
    aggfunc={"count_cv_full_rec": np.mean, "count_cv_any_rec": np.mean},
    fill_value=0,
)

in_monita = df[df["not_in_monita"] == False].copy()
in_monita["recognition"] = (
    (in_monita["count_mons"] - in_monita["is_manual"])
    / in_monita["count_mons"]
    * 100
)
pivot_not_deleted = in_monita.pivot_table(
    index="dt_create",
    values=[
        "url",
        "count_mons",
        "empty_cv",
        "is_manual",
        "count_mon_full_rec",
        "count_cv_full_rec",
        "count_cv_any_rec",
    ],
    aggfunc={
        "url": "count",
        "count_mons": np.sum,
        "is_manual": np.sum,
        "count_mon_full_rec": np.mean,
        "count_cv_full_rec": np.mean,
        "count_cv_any_rec": np.mean,
        "empty_cv": np.sum,
    },
).round(2)

pivot_days = (
    in_monita.pivot_table(
        index="dt_create",
        values=[
            "url",
            "count_mons",
            "empty_cv",
            "is_manual",
            "count_mon_full_rec",
            "count_cv_full_rec",
            "count_full_intersection",
        ],
        aggfunc={
            "url": pd.Series.nunique,
            "count_mons": np.sum,
            "is_manual": np.sum,
            "count_mon_full_rec": np.mean,
            "count_cv_full_rec": np.mean,
            "count_full_intersection": np.mean,
            "empty_cv": np.sum,
        },
        fill_value=0,
    )
    .reset_index()
    .sort_values(by="count_mons", ascending=False)
).reset_index(drop=True)

pivot_days["recognition"] = (
    (pivot_days["count_mons"] - pivot_days["is_manual"])
    / pivot_days["count_mons"]
    * 100
)
pivot_days.index += 1

In [117]:
pivot_days['date'] = '2022-' + pivot_days['dt_create']
pivot_days['date'] = pd.to_datetime(pivot_days['date'], format='%Y-%m-%d')
pivot_days = pivot_days.round(2)
pivot_days

Unnamed: 0,dt_create,count_cv_full_rec,count_full_intersection,count_mon_full_rec,count_mons,empty_cv,is_manual,url,recognition,date
1,03-28,22.27,17.41,20.69,181733,365,31884,8782,82.46,2022-03-28
2,04-04,22.13,16.69,20.1,174890,260,32250,8701,81.56,2022-04-04
3,04-18,22.18,16.08,19.66,169514,319,33344,8622,80.33,2022-04-18
4,03-14,19.15,14.33,18.21,158651,490,36460,8713,77.02,2022-03-14
5,03-21,18.59,13.78,17.79,155061,556,37418,8715,75.87,2022-03-21
6,04-11,21.99,16.56,20.23,133469,242,26177,6594,80.39,2022-04-11


In [26]:
pivot_days.dt_create.tolist()

['03-28', '04-04', '04-18', '03-14', '03-21', '04-11']

In [None]:
fig = px.bar(pivot_days.sort_values(by='dt_create'),
             x=pivot_days.dt_create.unique().tolist(), 
             y="count_cv_full_rec", 
             title='',
             width=800, height=400)
fig.update_xaxes(type='category', fixedrange=False, showspikes=True)
fig.show()


In [185]:
fig = px.line(pivot_days.sort_values(by='dt_create'),
              x=pivot_days.sort_values(by='dt_create').dt_create.unique().tolist(),
              y=['count_cv_full_rec', "count_mon_full_rec",
                  'count_full_intersection'],
              title='',
              labels=None,
              color_discrete_sequence=[px.colors.qualitative.Vivid[2], px.colors.qualitative.Vivid[4], px.colors.qualitative.Vivid[7]],
              width=800,
              height=400)

fig.update_yaxes(visible=True, showticklabels=True, title='')
fig.update_xaxes(type='category', fixedrange=False,
                 showspikes=True, showticklabels=True, title='')
fig.update_traces(
    mode="markers+lines", 
    hovertemplate=None,
    textposition='top center',
)
fig.update_layout(
    hovermode="x unified", 
    hoverlabel=dict(
        namelength=-1,
        bordercolor="White"), 
    margin=dict(l=10, r=10, t=20, b=0),
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="left",
        x=0,
    title='',
    title_font_family="Verdana",
    font=dict(
        family="Sans-serif",
        size=12,
        color="black"
        ),
    bordercolor="Black",
    borderwidth=None
)
)
fig.show()


In [209]:
fig = px.bar(pivot_days.sort_values(by='dt_create'),
              x=pivot_days.sort_values(by='dt_create').dt_create.unique().tolist(),
              y=['count_mons', 'is_manual'],
              title='',
              labels=None,
              color_discrete_sequence=[px.colors.qualitative.Vivid[2], px.colors.qualitative.Vivid[7]],
              width=800,
              height=400, text_auto=True)

fig.update_yaxes(visible=True, showticklabels=True, title='', range = [0,250000])
fig.update_xaxes(type='category', fixedrange=False,
                 showspikes=True, showticklabels=True, title='')
fig.update_layout(
    hovermode=None, 
    hoverlabel=dict(
        namelength=-1,
        bordercolor="White"), 
    margin=dict(l=0, r=20, t=20, b=0),
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="left",
        x=0,


        title='',
        title_font_family="Verdana",
        font=dict(
            family="Sans-serif",
            size=12,
            color="black"
        ),
        
    bordercolor="Black",
    borderwidth=None
)
)

In [214]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Bar(x=pivot_days.sort_values(by='dt_create').dt_create.unique().tolist(),
              y=pivot_days.sort_values(by='dt_create')[['count_mons', 'is_manual']], name="yaxis data"),
    secondary_y=False,
)

fig.add_trace(
    go.Line(x=pivot_days.sort_values(by='dt_create').dt_create.unique().tolist(),
              y=pivot_days.sort_values(by='dt_create')['recognition'], name="yaxis2 data"),
    secondary_y=True,
)

fig.update_xaxes(type='category', fixedrange=False,
                 showspikes=True, showticklabels=True, title='')

fig.update_yaxes(visible=True, showticklabels=True, title='', range = [0,250000])

    
fig.show()