In [None]:
import re
import json
import pandas as pd
from google.oauth2 import service_account
from datetime import date, timedelta, datetime
import time as tm
import os
from dateutil.relativedelta import relativedelta
import io
import requests

In [None]:
# Здесь нужно добавить id файла json в гугл диске - цифры после id
!gdown 'https://drive.google.com/uc?id={id_file}'

Downloading...
From: https://drive.google.com/uc?id=1QwmLE2ISnBSukCCL0QdlHgCvZvrUmB0g
To: /content/eaptekareklama-bq-ffab31fc6860.json
  0% 0.00/2.31k [00:00<?, ?B/s]100% 2.31k/2.31k [00:00<00:00, 2.26MB/s]


In [None]:
API_TOKEN = '____________________________' 
# В качестве пути указываем путь, показанный в предыдущем шаге
credentials = service_account.Credentials.from_service_account_file('/content/eaptekareklama-bq-ffab31fc6860.json')
additional_fields = 'device_model'
load_date = (date.today()-timedelta(days=1)).strftime('%Y-%m-%d')
from_date = (date.today()-timedelta(days=1)-timedelta(days=int((date.today()-timedelta(days=1)).strftime('%d'))-1)).strftime('%Y-%m-%d')
from_date_det = (date.today()-timedelta(days=8)).strftime('%Y-%m-%d')

In [None]:
def load_table(app_id, API_TOKEN, additional_fields, load_date):
  params = {
        'api_token':API_TOKEN, 
        'additional_fields':additional_fields,
        'maximum_rows':'1000000',
    }
  params['from'] = load_date
  params['to'] = load_date
  params['event_name'] = 'af_first_order,af_purchase'
  s = ''
  for key, value in params.items():
    d = key + '=' + value + '&'
    s += d
  df = pd.read_csv(f'https://hq.appsflyer.com/export/{app_id}/in_app_events_report/v5?' + s[:-1], parse_dates = ['Install Time', 'Event Time'])
  df_part = df[['Install Time','Event Time', 'Event Name', 'Event Revenue', 'Media Source', 'Campaign', 'AppsFlyer ID', 'Partner']]
  df_part.columns = [x.replace(' ', '_').lower() for x in df_part.columns]
  df_part['install_date'] = df_part.install_time.dt.date
  df_part['event_date'] = df_part.event_time.dt.date
  df_agg = df_part.groupby(['install_date', 'event_date', 'campaign', 'event_name', 'media_source', 'partner'], as_index=False) \
                  .agg({'appsflyer_id': 'nunique', 'event_revenue': 'sum'}) \
                  .rename(columns={'appsflyer_id': 'users', 'event_revenue': 'revenue'})
  df_agg[['install_date', 'event_date', 'campaign', 'event_name', 'media_source', 'partner']] = df_agg[['install_date', 'event_date', 'campaign', 'event_name', 'media_source', 'partner']].astype('str')
  df_agg[['users', 'revenue']] = df_agg[['users', 'revenue']].astype('float')
  return df_agg

In [None]:
def load_fraud(app_id, API_TOKEN, report, additional_fields, field, count_field, load_date):
  params = {
        'api_token':API_TOKEN, 
        'additional_fields':additional_fields,
        'maximum_rows':'1000000',
    }
  params['from'] = load_date
  params['to'] = load_date
  s = ''
  for key, value in params.items():
    d = key + '=' + value + '&'
    s += d
  df = pd.read_csv(f'https://hq.appsflyer.com/export/{app_id}/{report}/v5?' + s[:-1], parse_dates = ['Event Time'])
  df_part = df[['Event Time', 'Event Name', 'Media Source', 'Campaign', 'AppsFlyer ID', 'Partner', field]]
  df_part.columns = [x.replace(' ', '_').lower() for x in df_part.columns]
  df_part = df_part.query('event_name == "install"')
  df_part['event_date'] = df_part.event_time.dt.date
  df_agg = df_part.groupby(['event_date', 'campaign', 'media_source', 'partner', additional_fields], as_index=False) \
                  .agg({'appsflyer_id': 'nunique'}) \
                  .rename(columns={'appsflyer_id': count_field})
  df_agg[['event_date', 'campaign', 'media_source', 'partner', additional_fields]] = df_agg[['event_date', 'campaign', 'media_source', 'partner', additional_fields]].astype('str')
  df_agg[count_field] = df_agg[count_field].astype('int')
  return df_agg

In [None]:
def load_fraud_detection(app_id, API_TOKEN, from_date_det, load_date):
    url = f"https://hq.appsflyer.com/export/{app_id}/detection/v5?from={from_date_det}&to={load_date}&maximum_rows=1000000&additional_fields=fraud_reasons&api_token={API_TOKEN}"
    headers = {"accept": "text/csv"}
    response = requests.get(url, headers=headers)
    df = pd.read_csv(io.StringIO(response.content.decode('utf-8')), parse_dates = ['Event Time'])
    df_part = df[['Event Time', 'Event Name', 'Media Source', 'Campaign', 'AppsFlyer ID', 'Partner', 'Fraud Reasons']]
    df_part.columns = [x.replace(' ', '_').lower() for x in df_part.columns]
    df_part = df_part.query('event_name == "install"')
    df_part['event_date'] = df_part.event_time.dt.date
    df_agg = df_part.groupby(['event_date', 'campaign', 'media_source', 'partner', 'fraud_reasons'], as_index=False) \
                    .agg({'appsflyer_id': 'nunique'}) \
                    .rename(columns={'appsflyer_id': 'count_fraudinst'})
    df_agg[['event_date', 'campaign', 'media_source', 'partner', 'fraud_reasons']] = df_agg[['event_date', 'campaign', 'media_source', 'partner', 'fraud_reasons']].astype('str')
    df_agg['count_fraudinst'] = df_agg['count_fraudinst'].astype('int')
    return df_agg

In [None]:
def load_users(app_id, from_date, load_date):
  url = f"https://hq1.appsflyer.com/api/cohorts/v1/data/app/{app_id}"
  payload = {
      "kpis": ["users"],
      "groupings": ["date", "c", "af_prt"],
      "from": from_date,
      "to": load_date,
      "min_cohort_size": 1,
      "aggregation_type": "on_day",
      "cohort_type": "user_acquisition"
  }
  # Здесь в качестве токена вставляем версию API token V2.0 (который в несколько раз длиннее)
  headers = {
      "accept": "application/json",
      "content-type": "application/json",
      "authorization": "Bearer ___________________________________________________________________________________________________________"
  }

  response = requests.post(url, json=payload, headers=headers)
  rawData = pd.read_csv(io.StringIO(response.content.decode('utf-8')))
  rawData.date = pd.to_datetime(rawData.date)
  df_users = rawData.query('af_prt == "realweb"')[['date', 'c', 'users']].rename(columns={'date': 'install_date', 'c': 'campaign'})
  return df_users

In [None]:
def cohort_func(df, min_date, event, agg_field, df_users, dict_ag, dict_s):
  df_cohort_event = df.query('install_date >= @min_date and event_name == @event')[['install_date', 'campaign', agg_field, 'period']] \
                          .groupby(['install_date', 'campaign', 'period'], as_index=False).agg({agg_field:'sum'})
  max_df_period_ev = df_cohort_event.period.max()
  df_event_pivot = df_cohort_event.pivot_table(index = ['campaign', 'install_date'],
                                      columns = 'period',
                                      values = agg_field)
  df_event_pivot.columns.name = None
  df_event_pivot = df_event_pivot.reset_index().fillna(0) \
                  .rename(columns={i: f'{event} - count - day {i} - partial' for i in range(max_df_period_ev + 1)}) \
                  .merge(df_users, how='right', on=['campaign', 'install_date']).fillna(0) \
                  .rename(columns={'campaign': 'Campaign', 'install_date': 'Cohort Day', 'users': 'Users'}) \
                  .sort_values('Cohort Day')
  cols = df_event_pivot.columns.tolist()
  df_event_pivot = df_event_pivot[cols[:2] + cols[-1:] + cols[2:-1]]
  cols = df_event_pivot.columns.tolist()
  for i in range(4,len(cols)):
    df_event_pivot[cols[i]] += df_event_pivot[cols[i-1]]
  df_event_pivot[cols[3:]] = df_event_pivot[cols[3:]].round().astype('int')
  df_event_pivot['Cohort Day'] = df_event_pivot['Cohort Day'].astype('str').apply(lambda x: '.'.join(x.split('-')[::-1]))
  df_event_pivot = df_event_pivot.reset_index(drop=True)
  df_event_pivot.Campaign = df_event_pivot.Campaign.apply(lambda x: 'xapads_eapteka_a111_c217' if x == 'xapads-eapteka-xm' else x) \
                                                    .apply(lambda x: 'no_name_of_campaign' if len(x.split('_')) < 4 else x)
  df_event_pivot['code_agency'] = df_event_pivot.Campaign.apply(lambda x: x.split('_')[2] if len(x.split('_'))>1 and x.split('_')[2].startswith('a') else '')
  df_event_pivot['code_source'] = df_event_pivot.Campaign.apply(lambda x: x.split('_')[3] if len(x.split('_'))>1 and x.split('_')[3].startswith('c') else '')
  df_cohort_final_ev = df_event_pivot.merge(dict_ag, how='left', on='code_agency') \
                                      .merge(dict_s, how='left', on='code_source') \
                                      .drop(columns=['code_agency', 'code_source'])
  cols = df_cohort_final_ev.columns.tolist()
  df_cohort_final_ev = df_cohort_final_ev[cols[-1:] + cols[-2:-1] + cols[:-2]]
  df_cohort_final_ev.Users = df_cohort_final_ev.Users.fillna(0).astype('int')
  return df_cohort_final_ev

In [None]:
df_load_andr = load_table('ru.getpharma.eapteka', API_TOKEN, additional_fields, load_date)
df_load_ios = load_table('id570400364', API_TOKEN, additional_fields, load_date)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_part['install_date'] = df_part.install_time.dt.date
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_part['event_date'] = df_part.event_time.dt.date


In [None]:
sql_and_check = f"""SELECT * FROM `eaptekareklama-bq.Appsflyer.appsflyer_in_app_events_android`
WHERE event_date >= '{from_date}'"""
all_data_android = pd.read_gbq(sql_and_check, credentials=credentials, dialect='standard')
sql_ios_check = f"""SELECT * FROM `eaptekareklama-bq.Appsflyer.appsflyer_in_app_events_ios`
WHERE event_date >= '{from_date}'"""
all_data_ios = pd.read_gbq(sql_ios_check, credentials=credentials, dialect='standard')

In [None]:
if all_data_android.merge(df_load_andr, how='inner', on=df_load_andr.columns.to_list()).shape[0] == 0:
    df_load_andr.to_gbq('eaptekareklama-bq.Appsflyer.appsflyer_in_app_events_android', 
          project_id='eaptekareklama-bq',  
          if_exists='append',
          credentials=credentials)
if all_data_ios.merge(df_load_ios, how='inner', on=df_load_ios.columns.to_list()).shape[0] == 0:
    df_load_ios.to_gbq('eaptekareklama-bq.Appsflyer.appsflyer_in_app_events_ios', 
          project_id='eaptekareklama-bq',  
          if_exists='append',
          credentials=credentials)

In [None]:
andr_users = load_users('ru.getpharma.eapteka', from_date, load_date)
ios_users = load_users('id570400364', from_date, load_date)

In [None]:
dict_ag = pd.DataFrame({'code_agency': ['a485', 'a617', 'a564', 'a339', 'a776', 'a999', 'a499', 'a224', 'a823', 'a662', 'a799', 'a391', 'a888', 'a111'], 
                        'Agency name': ['2leads', 'Borscht', 'Colead', 'GoMobile', 'Gradientt', 'In-house', 'Mediaserfer', 'Mobisharks', 'MobX', 'Rocket10', 'ThinkMobile', 'TopTraffic', 'Mobio', 'xapads']})
dict_s = pd.DataFrame({'code_source': ['c201', 'c202', 'c203', 'c204', 'c210', 'c205', 'c206', 'c209', 'c207', 'c208', 'c211', 'c212', 'c217', 'c215', 'с216', 'с218', 'с219'], 
                        'Source name': ['Apple Search Ads', 'DV360', 'Facebook', 'Google Adwords', 'in-app', 'Mytarget', 'Tik Tok', 'Twitter', 'VK', 'Yandex.Direct', 'Appnext', 'beta vk', 'xapads', 'AdGate', 'Bigo Ads', 'untiy', 'mintegral']})

In [None]:
all_data_android = all_data_android.query('partner == "realweb"')
all_data_android.event_date = pd.to_datetime(all_data_android.event_date)
all_data_android.install_date = pd.to_datetime(all_data_android.install_date)
all_data_android['period'] = (all_data_android.event_date - all_data_android.install_date).dt.days
# min_date_android = all_data_android.event_date.min()
min_date_android = from_date

In [None]:
andr_cohort_final_ord = cohort_func(all_data_android, min_date_android, 'af_first_order', 'users', andr_users, dict_ag, dict_s)
andr_cohort_final_pur = cohort_func(all_data_android, min_date_android, 'af_purchase', 'users', andr_users, dict_ag, dict_s)
andr_cohort_final_pur_rev = cohort_func(all_data_android, min_date_android, 'af_purchase', 'revenue', andr_users, dict_ag, dict_s)

In [None]:
all_data_ios = all_data_ios.query('partner == "realweb"')
all_data_ios.event_date = pd.to_datetime(all_data_ios.event_date)
all_data_ios.install_date = pd.to_datetime(all_data_ios.install_date)
all_data_ios['period'] = (all_data_ios.event_date - all_data_ios.install_date).dt.days
# min_date_ios = all_data_ios.event_date.min()
min_date_ios = from_date

In [None]:
ios_cohort_final_ord = cohort_func(all_data_ios, min_date_ios, 'af_first_order', 'users', ios_users, dict_ag, dict_s)
ios_cohort_final_pur = cohort_func(all_data_ios, min_date_ios, 'af_purchase', 'users', ios_users, dict_ag, dict_s)
ios_cohort_final_pur_rev = cohort_func(all_data_ios, min_date_ios, 'af_purchase', 'revenue', ios_users, dict_ag, dict_s)

In [None]:
from oauth2client.service_account import ServiceAccountCredentials
import gspread
from gspread_dataframe import set_with_dataframe

scope = [
    'https://www.googleapis.com/auth/spreadsheets',
]

GOOGLE_KEY_FILE = '/content/eaptekareklama-bq-ffab31fc6860.json'

credentials_spread = ServiceAccountCredentials.from_json_keyfile_name(GOOGLE_KEY_FILE, scope)
gc = gspread.authorize(credentials_spread)

# Здесь вставляем id файла в спредшите (можно определить по ссылке)
workbook_key = '______________________________________________'
workbook = gc.open_by_key(workbook_key)

In [None]:
sheet1 = workbook.worksheet('andr_coh_order')
sheet1.clear()
set_with_dataframe(worksheet=sheet1, dataframe=andr_cohort_final_ord, include_index=False,
include_column_header=True, resize=True)
sheet2 = workbook.worksheet('andr_coh_pur')
sheet2.clear()
set_with_dataframe(worksheet=sheet2, dataframe=andr_cohort_final_pur, include_index=False,
include_column_header=True, resize=True)
sheet3 = workbook.worksheet('andr_coh_pur_rev')
sheet3.clear()
set_with_dataframe(worksheet=sheet3, dataframe=andr_cohort_final_pur_rev, include_index=False,
include_column_header=True, resize=True)
sheet4 = workbook.worksheet('ios_coh_order')
sheet4.clear()
set_with_dataframe(worksheet=sheet4, dataframe=ios_cohort_final_ord, include_index=False,
include_column_header=True, resize=True)
sheet5 = workbook.worksheet('ios_coh_pur')
sheet5.clear()
set_with_dataframe(worksheet=sheet5, dataframe=ios_cohort_final_pur, include_index=False,
include_column_header=True, resize=True)
sheet6 = workbook.worksheet('ios_coh_pur_rev')
sheet6.clear()
set_with_dataframe(worksheet=sheet6, dataframe=ios_cohort_final_pur_rev, include_index=False,
include_column_header=True, resize=True)

In [None]:
df_detection_android = load_fraud_detection('ru.getpharma.eapteka', API_TOKEN, from_date_det, load_date)
df_detection_ios = load_fraud_detection('id570400364', API_TOKEN, from_date_det, load_date)

In [None]:
sql_block_andr = """SELECT * FROM `eaptekareklama-bq.Appsflyer.appsflyer_detection_android`"""
data_block_android = pd.read_gbq(sql_block_andr, credentials=credentials, dialect='standard')
data_block_android = data_block_android.drop_duplicates()
data_block_android.event_date = pd.to_datetime(data_block_android.event_date)
data_det_android = data_block_android.query('event_date < @from_date_det')
data_det_android.event_date = data_det_android.event_date.dt.date.astype('str')
data_det_and = pd.concat([data_det_android, df_detection_android])
sql_block_ios = """SELECT * FROM `eaptekareklama-bq.Appsflyer.appsflyer_detection_ios`"""
data_block_ios = pd.read_gbq(sql_block_ios, credentials=credentials, dialect='standard')
data_block_ios = data_block_ios.drop_duplicates()
data_block_ios.event_date = pd.to_datetime(data_block_ios.event_date)
data_det_ios = data_block_ios.query('event_date < @from_date_det')
data_det_ios.event_date = data_det_ios.event_date.dt.date.astype('str')
data_det_ios_f = pd.concat([data_det_ios, df_detection_ios])

In [None]:
data_det_and.to_gbq('eaptekareklama-bq.Appsflyer.appsflyer_detection_android', 
          project_id='eaptekareklama-bq',  
          if_exists='replace',
          credentials=credentials)
data_det_ios_f.to_gbq('eaptekareklama-bq.Appsflyer.appsflyer_detection_ios', 
          project_id='eaptekareklama-bq',  
          if_exists='replace',
          credentials=credentials)

In [None]:
data_block_android = data_block_android.query('partner == "realweb"')
data_block_ios = data_block_ios.query('partner == "realweb"')
data_block_android.event_date = pd.to_datetime(data_block_android.event_date)
data_block_ios.event_date = pd.to_datetime(data_block_ios.event_date)
min_date_android_block = from_date
min_date_ios_block = from_date
df_block_andr = data_block_android.query('event_date >= @min_date_android_block')[['event_date', 'campaign', 'count_fraudinst']] \
                          .groupby(['event_date', 'campaign'], as_index=False).agg({'count_fraudinst':'sum'})
df_block_ios = data_block_ios.query('event_date >= @min_date_ios_block')[['event_date', 'campaign', 'count_fraudinst']] \
                          .groupby(['event_date', 'campaign'], as_index=False).agg({'count_fraudinst':'sum'})
df_block_andr['code_agency'] = df_block_andr.campaign.apply(lambda x: x.split('_')[2] if len(x.split('_'))>1 and x.split('_')[2].startswith('a') else '')
df_block_andr['code_source'] = df_block_andr.campaign.apply(lambda x: x.split('_')[3] if len(x.split('_'))>1 and x.split('_')[3].startswith('c') else '')
df_block_ios['code_agency'] = df_block_ios.campaign.apply(lambda x: x.split('_')[2] if len(x.split('_'))>1 and x.split('_')[2].startswith('a') else '')
df_block_ios['code_source'] = df_block_ios.campaign.apply(lambda x: x.split('_')[3] if len(x.split('_'))>1 and x.split('_')[3].startswith('c') else '')
andr_final_block = df_block_andr.merge(dict_ag, how='left', on='code_agency') \
                                .merge(dict_s, how='left', on='code_source') \
                                .drop(columns=['code_agency', 'code_source'])
andr_final_block = andr_final_block[['Source name', 'Agency name', 'campaign', 'event_date', 'count_fraudinst']]
andr_final_block.event_date = andr_final_block.event_date.dt.date
ios_final_block = df_block_ios.merge(dict_ag, how='left', on='code_agency') \
                              .merge(dict_s, how='left', on='code_source') \
                              .drop(columns=['code_agency', 'code_source'])
ios_final_block = ios_final_block[['Source name', 'Agency name', 'campaign', 'event_date', 'count_fraudinst']]
ios_final_block.event_date = ios_final_block.event_date.dt.date

In [None]:
sheet7 = workbook.worksheet('Protect_and_install')
sheet7.clear()
set_with_dataframe(worksheet=sheet7, dataframe=andr_final_block, include_index=False,
include_column_header=True, resize=True)
sheet8 = workbook.worksheet('Protect_iOS_install')
sheet8.clear()
set_with_dataframe(worksheet=sheet8, dataframe=ios_final_block, include_index=False,
include_column_header=True, resize=True)