In [1]:
import pandas as pd
import os
from openpyxl.utils.dataframe import dataframe_to_rows
import sqlalchemy
import datetime
import dateutil
import json
import smtplib
from basics import mysql_data, postgres_data


if __name__ == '__main__':
    script_start = datetime.datetime.now()
    today_msc = datetime.datetime.now(datetime.timezone(datetime.timedelta(hours=3))).replace(tzinfo=None)

    date_edge = (today_msc - datetime.timedelta(days=1) - dateutil.relativedelta.relativedelta(
        months=5)).replace(day=1, hour=0, minute=0, second=0, microsecond=0).strftime("%Y-%m-%d")
    report_date = datetime.date.today().strftime("%Y-%m-%d")

    query = f'''
        with refferal_pcodes AS 
        (select distinct pav_order_id, code  
        from utkonos_sess.utk_promocode_buyer_using
        where action_id = 'mr00464' )

    select distinct f.buyer_id, first_o, f.pav_order_id, code, address
    from reports.first_order f inner join refferal_pcodes u on f.pav_order_id = u.pav_order_id
    inner join utkonos_sess.zakaz_delivery d on f.pav_order_id = d.pav_order_id
    where first_o >= '{date_edge}' and first_o < '{report_date}'
    '''

    query_2 = f'''
    with last_o as
        (select
            pav_order_id, buyer_id
        from (
            select
                pav_order_id,
                buyer_id,
                created,
                row_number() over (partition by buyer_id order by created) as row_num,
                count(1) over (partition by buyer_id) as all_num
            from ZAKAZ
            where created >= '{date_edge}'  and created < '{report_date}'
            ) as o
        where row_num = all_num
        )
    select
        d.buyer_id,
        d.address
    from last_o
    inner join zakaz_delivery d
        using(pav_order_id)
    '''

    query_3 = f'''
    select user_id, promo_code, date(created_at) AS created
    from private_messages
    where message_type = 3
            and message LIKE 'Код для друзей%%'
            and date(created_at) >= '{date_edge}'
            and date(created_at) < '{report_date}'
    '''

    print(f'Витрина с {date_edge} по {report_date}')
    #сначала выделим все 1-е заказы по реферальному промокоду
    data_first_o = mysql_data(query) 
    #затем нам нужны адреса тех, что гипотетически эти промокоды высылал тем, кто воспользовался
    senders_last_order = mysql_data(query_2)

    #далее выгружаем из постгреса тех, кто эти промокоды высылал друзьям, то есть отправителей
    senders = postgres_data(query_3, db="notification")
    #присоединяем к отправителям реферальных промокодов их адреса
    senders_with_addresses = senders.merge(senders_last_order, how = 'inner', left_on='user_id', right_on='buyer_id')
    senders_with_addresses = senders_with_addresses[['user_id','promo_code', 'created', 'address']]
    #присоединяем предыдущую таблицу к таблице с первыми заказами тех, кто этот промокод заюзал. Если адрес и промик совпадут, значит, это фрод
    data_final = data_first_o.merge(senders_with_addresses, how = 'left', left_on = ['code', 'address'], right_on = ['promo_code', 'address'])
    data_final['fraud_flag'] = data_final['user_id'].apply(lambda x: 1 if x > 0 else 0)#введем фродовый флаг
    data_final = data_final.drop(['address', 'user_id', 'promo_code', 'created'], axis = 1)#дропнем лишние столбцы

    end_time  = datetime.datetime.now()
    time_1 = round((end_time - script_start).total_seconds() / 60,2)
    print(f'Data calculated, time consumed: {time_1}')

    with open('config.json', 'r') as f:
        config_file = json.load(f)

    config = config_file['monolit']
    dop_string = "?charset=utf8"
    connection_string  = ("{prefix}://{user}:{passw}@{host}:{port}/{db}" + dop_string).format(
        prefix  =   config['prefix']    , host  =   config['host']
        , user  =   config['user']      , passw =   config['password']
        , port  =   config['port']      , db    =   config['database']
    )

    engine      = sqlalchemy.create_engine(connection_string)

    table_name = 'refferal_pcodes_with_fraud'
    data_final.to_sql(table_name, engine, schema='reports', if_exists='replace')
    print('Successully created a table')

    end_time  = datetime.datetime.now()
    time_2 = round((end_time - script_start).total_seconds() / 60,2)
    print(f'Total time consumed: {time_2}')

Витрина с 2022-01-01 по 2022-06-08
Connecting to utkonos_sess... -> Quering to utkonos_sess... -> Successfully queried to utkonos_sess
Connecting to utkonos_sess... -> Quering to utkonos_sess... -> Successfully queried to utkonos_sess
Connecting to notification... -> Quering to notification... -> Successfully queried to notification
Data calculated, time consumed: 3.55
Successully created a table
Total time consumed: 3.56
