### OD-1. Исследование каналов привлечения 

ЗАДАЧИ
* Выгрузить данные.
* Рассчитать коэффициент конверсии с помощью Pandas.

КОНКРЕТНЫЕ ШАГИ (ФОРМАЛИЗОВАННЫЕ ЗАДАЧИ)
* Определить параметры запроса к Яндекс.Метрике.
* Выгрузить их по API с помощью Jupyter Notebook.
* Обработать данные в Pandas.
* Посчитать коэффициент конверсии по каналам.

Для выгрузки отчёта нам необходимы следующие данные:
* источники трафика по модели атрибуции «Последний значимый источник»;
* суммарное количество визитов;
* количество выполнений целевого условия «Найм клинеров» во всех визитах.

In [1]:
import json
import requests
import pandas as pd
import numpy as np
from pprint import pprint
token = 'AQAAAAADybOOAAedRJ17-iGHOUPFs-lrxYXlefQ'
headers = {'Authorization': 'OAuth ' + token}
params = {'metrics': 'ym:s:visits,ym:s:goal137595631reaches',
          'dimensions': 'ym:s:lastsignTrafficSource',  
          'date1': '2020-10-07',  
          'date2': '2020-10-09',     
          'ids': 30177909,
          'accuracy':'full',
          'limit':100000}
response = requests.get('https://api-metrika.yandex.net/stat/v1/data', params=params, headers=headers)
metrika_data = response.json()
pprint(metrika_data['data'][0:5])

[{'dimensions': [{'icon_id': '2',
                  'icon_type': 'traffic-source',
                  'id': 'organic',
                  'name': 'Search engine traffic'}],
  'metrics': [345.0, 10.0]},
 {'dimensions': [{'icon_id': '0',
                  'icon_type': 'traffic-source',
                  'id': 'direct',
                  'name': 'Direct traffic'}],
  'metrics': [113.0, 5.0]},
 {'dimensions': [{'icon_id': '1',
                  'icon_type': 'traffic-source',
                  'id': 'referral',
                  'name': 'Link traffic'}],
  'metrics': [16.0, 1.0]},
 {'dimensions': [{'icon_id': '8',
                  'icon_type': 'traffic-source',
                  'id': 'social',
                  'name': 'Social network traffic'}],
  'metrics': [12.0, 1.0]},
 {'dimensions': [{'icon_id': '-1',
                  'icon_type': 'traffic-source',
                  'id': 'internal',
                  'name': 'Internal traffic'}],
  'metrics': [2.0, 0.0]}]


In [2]:
import pandas as pd # Импортируем библиотеку pandas под псевдонимом pd

metrika_df = pd.DataFrame(metrika_data['data'])
display(metrika_df.head(10)) # Функция .head(10) позволяет выбрать только 10 первые строк датафрейма 

Unnamed: 0,dimensions,metrics
0,"[{'icon_id': '2', 'icon_type': 'traffic-source...","[345.0, 10.0]"
1,"[{'icon_id': '0', 'icon_type': 'traffic-source...","[113.0, 5.0]"
2,"[{'icon_id': '1', 'icon_type': 'traffic-source...","[16.0, 1.0]"
3,"[{'icon_id': '8', 'icon_type': 'traffic-source...","[12.0, 1.0]"
4,"[{'icon_id': '-1', 'icon_type': 'traffic-sourc...","[2.0, 0.0]"


In [3]:
def getMetrikaDataInListOfDicts(metrika_data):
    list_of_dicts = []
    dimensions_list = metrika_data['query']['dimensions']
    metrics_list = metrika_data['query']['metrics']
    for data_item in metrika_data['data']:
        d = {}
        for i,dimension in enumerate(data_item['dimensions']):
            d[dimensions_list[i]] = dimension['name']
        for i,metric in enumerate(data_item['metrics']):
            d[metrics_list[i]] = metric
        list_of_dicts.append(d)
    return list_of_dicts

metrika_list_of_dicts = getMetrikaDataInListOfDicts(metrika_data)
pprint(metrika_list_of_dicts[0:5])

[{'ym:s:goal137595631reaches': 10.0,
  'ym:s:lastsignTrafficSource': 'Search engine traffic',
  'ym:s:visits': 345.0},
 {'ym:s:goal137595631reaches': 5.0,
  'ym:s:lastsignTrafficSource': 'Direct traffic',
  'ym:s:visits': 113.0},
 {'ym:s:goal137595631reaches': 1.0,
  'ym:s:lastsignTrafficSource': 'Link traffic',
  'ym:s:visits': 16.0},
 {'ym:s:goal137595631reaches': 1.0,
  'ym:s:lastsignTrafficSource': 'Social network traffic',
  'ym:s:visits': 12.0},
 {'ym:s:goal137595631reaches': 0.0,
  'ym:s:lastsignTrafficSource': 'Internal traffic',
  'ym:s:visits': 2.0}]


In [4]:
metrika_df = pd.DataFrame(metrika_list_of_dicts)
display(metrika_df.head(10))

Unnamed: 0,ym:s:lastsignTrafficSource,ym:s:visits,ym:s:goal137595631reaches
0,Search engine traffic,345.0,10.0
1,Direct traffic,113.0,5.0
2,Link traffic,16.0,1.0
3,Social network traffic,12.0,1.0
4,Internal traffic,2.0,0.0


In [5]:
metrika_df.columns=['Source','Visits','Conversions']
metrika_df['Last non-direct CR%'] = metrika_df['Conversions']*100/metrika_df['Visits']
display(metrika_df.head(10))

Unnamed: 0,Source,Visits,Conversions,Last non-direct CR%
0,Search engine traffic,345.0,10.0,2.898551
1,Direct traffic,113.0,5.0,4.424779
2,Link traffic,16.0,1.0,6.25
3,Social network traffic,12.0,1.0,8.333333
4,Internal traffic,2.0,0.0,0.0


In [6]:
params = {'metrics': 'ym:s:visits,ym:s:goal137595631reaches',
          'dimensions': 'ym:s:lastTrafficSource',  
          'date1': '2020-10-07',  
          'date2': '2020-10-09',     
          'ids': 30177909,
          'accuracy':'full',
          'limit':100000}
response = requests.get('https://api-metrika.yandex.net/stat/v1/data', params=params, headers=headers)
metrika_data = response.json()

def getMetrikaDataInListOfDicts(metrika_data):
    list_of_dicts = []
    dimensions_list = metrika_data['query']['dimensions']
    metrics_list = metrika_data['query']['metrics']
    for data_item in metrika_data['data']:
        d = {}
        for i,dimension in enumerate(data_item['dimensions']):
            d[dimensions_list[i]] = dimension['name']
        for i,metric in enumerate(data_item['metrics']):
            d[metrics_list[i]] = metric
        list_of_dicts.append(d)
    return list_of_dicts

metrika_list_of_dicts = getMetrikaDataInListOfDicts(metrika_data)
metrika_df = pd.DataFrame(metrika_list_of_dicts)

metrika_df.columns=['Source','Visits','Conversions']
metrika_df['Last non-direct CR%'] = metrika_df['Conversions']*100/metrika_df['Visits']
display(metrika_df.head(10))

Unnamed: 0,Source,Visits,Conversions,Last non-direct CR%
0,Search engine traffic,317.0,10.0,3.154574
1,Direct traffic,144.0,6.0,4.166667
2,Link traffic,15.0,1.0,6.666667
3,Social network traffic,10.0,0.0,0.0
4,Internal traffic,2.0,0.0,0.0
