In [None]:
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import seaborn as sns
import io
from io import StringIO
import requests
import pandahouse
import telegram

from airflow.decorators import dag, task
from airflow.operators.python import get_current_context



# Дефолтные параметры, которые прокидываются в таски
default_args = {
    'owner': 'd.melikhov',
    'depends_on_past': False,
    'retries': 2,
    'retry_delay': timedelta(minutes=5),
    'start_date': datetime(2022, 8, 3),
}

# Интервал запуска DAG
schedule_interval = '*/15 * * * *'

token = '1'
chat_id = 1

# инициализируем бота
bot = telegram.Bot(token=token)

@dag(default_args=default_args, schedule_interval=schedule_interval, catchup=False)
def dag_alert():
    
    @task
    def extract_data():
        # данные подключения скрыты
            connection = {
        }

            query = """
                select *, dense_rank() over(partition by d  order by t_period) rn
                from
                (select toDayOfWeek(time) day_of_week, toDate(time) d, 
                        toStartOfFifteenMinutes(time) t_period, 
                        os, count(distinct user_id) f_users, 
                        countIf(action, action = 'view') views, 
                        countIf(action, action = 'like') likes,
                        likes/views ctr
                from simulator_20220720.feed_actions 
                where toDate(time) > today() - 30
                group by t_period, d, day_of_week, os ) f
                join (
                        select toDayOfWeek(time) day_of_week, toDate(time) d,  
                        toStartOfFifteenMinutes(time) t_period, 
                        os, count(distinct user_id) m_users,
                        count(reciever_id) messages
                        from simulator_20220720.message_actions
                        where toDate(time) > today() - 30
                        group by t_period, d, day_of_week, os
                    ) m using (day_of_week, d, t_period, os)
                order by t_period, day_of_week, os


                    """
            df = pandahouse.read_clickhouse(query = query, connection=connection)

            df['just_t'] = df['t_period'].dt.time    
            yesterday = pd.to_datetime(df['d'].max())
            df['dif_in_days'] =  (yesterday -  df['d']).dt.days

            # и поменяем тип
            df['day_of_week'] = df['day_of_week'].astype('int8')
            df['f_users'] = df['f_users'].astype('int64')
            df['m_users'] = df['m_users'].astype('int64')
            df['views'] = df['views'].astype('int64')
            df['likes'] = df['likes'].astype('int64')
            df['messages'] = df['messages'].astype('int64')
            df['rn'] = df['rn'].astype('int64')
            df['ctr'] = round(df['ctr'],5)

            return df

    
    @task
    def today(df):
        mask = df[df['t_period'] != df['t_period'].max()]['t_period'].max()
        last = df[df['t_period'] == mask].set_index(pd.Index([0,1]))
        return last

    
    @task
    def check_ios(df, last):
        # по межкварт. размаху месяца
        last = last[last['os'] == 'iOS']
        os = last['os'].item()
        rn = last['rn'].item()
        df = df[(df['os'] == os) & (df['rn'] == rn) & (df['d'] < last['d'].item())]
        t = last['t_period'].item()
        metrics = ['f_users', 'views', 'likes',
           'ctr', 'm_users', 'messages']



        for metric in metrics:
            a = 2
            low_bound = df[metric].quantile(0.25)
            high_bound = df[metric].quantile(0.75)
            iqr = high_bound - low_bound

            m = df[metric].mean()

            if (last[metric].item() >= high_bound + a * iqr) or (last[metric].item() <= low_bound - a * iqr):

                msg = f'время: {t} ос: {os} метрика: {metric} значение: {last[metric].item()} отклонение от среднего месяца: {round((last[metric].item() - m)*100/m, 2)} %' 

                

                sns.set(rc={'figure.figsize':(11,8)})
                ax = sns.lineplot(data = df, y = df[metric], x = df['d'].dt.strftime('%m-%d'))
                plt.setp(ax.get_xticklabels(), rotation=45)
                plt.title(f'значения 15-минуток в этом месяце по ос: {os} метрика: {metric}')

                plot_object = io.BytesIO()
                plt.savefig(plot_object)
                plot_object.seek(0)
                plot_object.name = 'month_line.png'
                plt.close()
                
                bot.sendMessage(chat_id=chat_id, text=msg)
                bot.sendPhoto(chat_id=chat_id, photo=plot_object)
                
                
    @task
    def check_and(df, last):
        # по межкварт. размаху месяца
        
        last = last[last['os'] == 'Android']

        os = last['os'].item()
        rn = last['rn'].item()
        df = df[(df['os'] == os) & (df['rn'] == rn) & (df['d'] < last['d'].item())]
        t = last['t_period'].item()
        metrics = ['f_users', 'views', 'likes',
           'ctr', 'm_users', 'messages']



        for metric in metrics:
            a = 2
            low_bound = df[metric].quantile(0.25)
            high_bound = df[metric].quantile(0.75)
            iqr = high_bound - low_bound

            m = df[metric].mean()

            if (last[metric].item() >= high_bound + a * iqr) or (last[metric].item() <= low_bound - a * iqr):

                msg = f'время: {t} ос: {os} метрика: {metric} значение: {last[metric].item()} отклонение от среднего месяца: {round((last[metric].item() - m)*100/m, 2)} %' 

                

                sns.set(rc={'figure.figsize':(11,8)})
                ax = sns.lineplot(data = df, y = df[metric], x = df['d'].dt.strftime('%m-%d'))
                plt.setp(ax.get_xticklabels(), rotation=45)
                plt.title(f'значения 15-минуток в этом месяце по ос: {os} метрика: {metric}')

                plot_object = io.BytesIO()
                plt.savefig(plot_object)
                plot_object.seek(0)
                plot_object.name = 'month_line.png'
                plt.close()
                
                bot.sendMessage(chat_id=chat_id, text=msg)
                bot.sendPhoto(chat_id=chat_id, photo=plot_object)


    df = extract_data()
    last = today(df)
    check_ios(df, last)
    check_and(df, last)
    
dag_alert_melikhov = dag_alert()