In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import telegram
import pandahouse as ph
import asyncio
import io
from datetime import datetime,  timedelta
from airflow.decorators import dag, task
from airflow.operators.python import get_current_context

schedule_interval = '*/15 * * * *'

connection = {'host': '******',
              'database': '******',
              'user': '******',
              'password': '******'
              }

default_args = {
    'owner': 'r_muksinov', # владелец
    'depends_on_past': False, # не зависит от  успешности прошлого запуска 
    'retries': 2, # количество рестартов
    'retry_delay': timedelta(minutes=5), # пауза между рестартами
    'start_date': datetime(2023, 4, 9) # начало выполнения
}

@dag(default_args=default_args, schedule_interval=schedule_interval, catchup=False, tags=['r.muksinov'])
def muksinov_alert_bot():
    
    @task()
    def extract_feed():
        query = '''
                SELECT 
                    toStartOfFifteenMinutes(time) as ts,
                    toDate(time) as date,
                    formatDateTime(ts, '%R') as hm,
                    uniqExact(user_id) as users_feed,
                    countIf(user_id, action='view') as view,
                    countIf(user_id, action='like') as likes, 
                    ROUND(countIf(user_id, action='like') / countIf(user_id, action='view'), 3) AS CTR
                FROM simulator_20230320.feed_actions
                WHERE time >= today() - 1 and time < toStartOfFifteenMinutes(now())
                GROUP BY ts, date, hm
                ORDER BY ts
                '''
        data = ph.read_clickhouse(query, connection=connection)
        return data
    
    @task()
    def extract_message():
        query = '''
                SELECT 
                    toStartOfFifteenMinutes(time) as ts,
                    toDate(time) as date,
                    formatDateTime(ts, '%R') as hm,
                    uniqExact(user_id) as users_messages,
                    uniqExact(reciever_id) as unique_receivers
                FROM simulator_20230320.message_actions
                WHERE time >= today() - 1 and time < toStartOfFifteenMinutes(now())
                GROUP BY ts, date, hm
                ORDER BY ts
                '''
        data = ph.read_clickhouse(query, connection=connection)
        return data

    def check_anomaly(df, metric, a=5, n=5):
        df['q25'] = df[metric].shift(1).rolling(n).quantile(0.25)
        df['q75'] = df[metric].shift(1).rolling(n).quantile(0.75)
        df['iqr'] = df['q75'] - df['q25']  # межквартильный размах
        df['top'] = df['q75'] + a * df['iqr']
        df['bottom'] = df['q25'] - a * df['iqr']

        df['top'] = df['top'].rolling(n, center=True, min_periods=1).mean()
        df['bottom'] = df['bottom'].rolling(n, center=True, min_periods=1).mean()

        if df[metric].iloc[-1] < df['bottom'].iloc[-1] or df[metric].iloc[-1] > df['top'].iloc[-1]:
            is_alert = 1
        else:
            is_alert = 0

        return is_alert, df

    feed_metrics = ['users_feed', 'view', 'likes', 'CTR']
    message_metrics = ['users_messages', 'unique_receivers']
    

    @task()    
    def run_alerts(data, metrics = None, chat_id=None):
        chat_id = ******
        bot = telegram.Bot(token='*****')
        metrics_list = metrics
        for metric in metrics_list:
            df = data[['ts', 'date', 'hm', metric]].copy()
            is_alert, df = check_anomaly(df, metric)
            if is_alert==1:
                current_val = df[metric].iloc[-1]
                last_val_diff = 1 - (df[metric].iloc[-1] / df[metric].iloc[-2])
                
                users_feed_dashboard = 'https://superset.lab.karpov.courses/superset/dashboard/3336/' 
                messages_dashboard = 'https://superset.lab.karpov.courses/superset/dashboard/3338/'
                if metric in feed_metrics:
                    dashboard = users_feed_dashboard
                else:
                    dashboard = messages_dashboard
                
                if metric=='users_feed':
                    emoji = '📰'
                elif metric=='view':
                    emoji = '👀'
                elif metric=='likes':
                    emoji = '👍'
                elif metric=='CTR':
                    emoji = '〽️'
                elif metric == 'users_messages':
                    emoji = '📨'
                elif metric == 'unique_receivers':
                    emoji = '📩'
                
                if last_val_diff < 0:
                    deviation = '📉'
                else:
                    deviation = '📈'
                    
                    
                msg = f'{emoji} Метрика {metric}:\n⏩ Текущее значение: {current_val:.2f}\n{deviation} Отклонение от предыдущего значения: {last_val_diff: .2%}\n📊 Дашборд по метрикам:{dashboard}\n⚠️@Denmark_19'

                sns.set(rc={'figure.figsize': (16, 10)})
                plt.tight_layout()
                ax = sns.lineplot(x=df['ts'], y=df[metric], label=metric)
                ax = sns.lineplot(x=df['ts'], y=df['top'], label='top')
                ax = sns.lineplot(x=df['ts'], y=df['bottom'], label='bottom')

                for ind, label in enumerate(ax.get_xticklabels()):
                    if ind % 2 == 0:
                        label.set_visible(True)
                    else:
                        label.set_visible(False)

                ax.set(xlabel='time')
                ax.set(ylabel=metric)

                ax.set_title(metric)
                ax.set(ylim=(0, None))

                plot_object = io.BytesIO()
                ax.figure.savefig(plot_object)
                plot_object.seek(0)
                plot_object.name = '{0}.png'
                plt.close()

                bot.sendPhoto(chat_id=chat_id, photo=plot_object, caption=msg)


    feed = extract_feed()
    messages = extract_message()
    run_alerts(feed, feed_metrics)
    run_alerts(messages, message_metrics)
    
muksinov_alert_bot = muksinov_alert_bot()