In [None]:
from os.path import expanduser
import json

def get_config():
    home = expanduser("~")
    
    with open(home + '/config.json') as f:
        config_data = f.read()
    config = json.loads(config_data)
    return config

YT_TOKEN = get_config()['miptgirl_yt_token']

In [17]:
# -*- coding: utf-8 -*-
from yql.api.v1.client import YqlClient

from nile.api.v1 import (
    clusters,
    aggregators as na,
    extractors as ne,
    filters as nf,
    Record
)

import pandas as pd
import datetime
import requests

yql_query_tmpl = '''
use hahn;
pragma yt.Pool = 'search-research';

$is_page_view = Re2::Match('(?:^|.*:)pv:1(?:$|:.*)');

$script = @@
def get_date(datetime):
    return datetime.split()[0]
@@;

$date = Python::get_date( 
    "(String?)->String?",  
    $script                             
);

insert into [home/metrica-analytics/miptgirl/itp_wo_yuids_versions]
with truncate
    select
        date,
        browser,
        os_family, 
        os_version,
        count(*) as total_hits,
        count_if(uniqid = '0') as hits_wo_yuid
    from
        (select
        	'<date>' as date,
            uniqid,
            UserAgent::Parse(useragent).BrowserName as browser,
            UserAgent::Parse(useragent).OSFamily as os_family,
            UserAgent::Parse(useragent).OSVersion as os_version
        from range('statbox/bs-watch-log', '<date>', '<date>')
        where $is_page_view(browserinfo)
            and UserAgent::Parse(useragent).OSFamily in ('iOS', 'MacOS')
            and UserAgent::Parse(useragent).BrowserName in ('Safari', 'MobileSafari')
        )
    group by 
        browser, 
        os_family, 
        date, 
        os_version;
	'''

import time
def post_to_stat(values, report):

    """POST new data for statisticts"""
    headers = {
        'User-Agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)',
        'StatRobotUser': 'robot_miptgirl',
        'StatRobotPassword': 'Poe1aich2D'
    }

    data = {
        'name': 'Metrika/%s' % report,
        'scale': 'd',
        'tsv_data': values
    }
    NUMBER_OF_TRIES = 10
    DELAY = 10
    for i in range(NUMBER_OF_TRIES):
        time.sleep(DELAY*i)
        r = requests.post('https://upload.stat.yandex-team.ru/_api/report/data', data=data, headers=headers)
        if r.status_code == 200:
            return r.text
        else:
            print 'request failed, attemp %d' % (i+1)
            
            if i == NUMBER_OF_TRIES - 1:
                print '#### operation failed'
                raise ValueError, r.text 

In [2]:
date = datetime.datetime.today() - datetime.timedelta(1)
date_str = date.strftime('%Y-%m-%d')

In [4]:
yql_query = yql_query_tmpl.replace('<date>', date_str)

In [6]:
%%time
with YqlClient(db="hahn", token='AQAD-qJSJkXyAAADvoy7R0K3hEGjl1zFYdu5_m8') as client:
    yql_insert = client.query(yql_query)
    yql_insert.run()
    print yql_insert

[92mQuery completed successfully.[0m
CPU times: user 1.42 s, sys: 80 ms, total: 1.5 s
Wall time: 45min 19s


In [7]:
cluster = clusters.Hahn(token = YT_TOKEN).env()
df = cluster.read('home/metrica-analytics/miptgirl/itp_wo_yuids_versions').as_dataframe()

In [9]:
ios_df = df[df.os_family == 'iOS'].copy()

ios_df['os_version_group'] = map(
    lambda x: '.'.join(x.split('.')[:2]) if str(x).startswith('11.') else 'older versions',
    ios_df.os_version
)

In [10]:
ios_versions_df = ios_df.pivot_table(index = 'date', columns = 'os_version_group', values = 'total_hits', aggfunc = sum)

In [11]:
ios_versions_df_wo_yuid = ios_df.pivot_table(index = 'date', columns = 'os_version_group', values = 'hits_wo_yuid', aggfunc = sum)

In [12]:
ios_df_aggr = ios_df.groupby(['date', 'os_version_group', 'browser', 'os_family'], as_index=False).sum()
ios_df_aggr['hits_wo_yuid_share'] = ios_df_aggr.hits_wo_yuid/ios_df_aggr.total_hits
ios_df_aggr_total = ios_df.groupby(['date', 'browser', 'os_family'], as_index = False)[['total_hits']].sum()
ios_df_aggr_total.columns = [u'date', u'browser', u'os_family', u'slice_total_hits']
ios_df_aggr_mrgd = ios_df_aggr.merge(ios_df_aggr_total, 
                 on = ['date', 'browser', 'os_family'])
ios_df_aggr_mrgd['os_version_share'] = ios_df_aggr_mrgd.total_hits/ios_df_aggr_mrgd.slice_total_hits
ios_df_aggr_mrgd.drop('slice_total_hits', axis = 1, inplace = True)

In [13]:
ios_df_aggr_mrgd.columns = map(lambda x: x if x != 'date' else 'fielddate', ios_df_aggr_mrgd.columns)
ios_df_aggr_mrgd.columns = map(lambda x: x if x != 'os_version_group' else 'os_version', ios_df_aggr_mrgd.columns)

In [18]:
print post_to_stat(ios_df_aggr_mrgd.to_csv(index = False, sep = '\t'), 'ITP/minor_version_stats')

{"message":"Данные успешно загружены. Загружено строк: 6"}
