In [43]:
import pandas as pd
import numpy as np
import requests

In [94]:
feed_version = ['106.0' , '106.1']
users = ['16b378a27891j67x4f57', 'ij988a2716b3767x4f57']


In [95]:
#define functions to get ads retrieved by spells
def query_phoenix(query=None, user='16b37891d1ax4f578a27', category_filter=None, location_filter=None, 
                  user_lat=None, user_lon=None, site='olx.co.za', n_results=100,
                  layout=None, sorting=None, bucket_size=None):
    """ This method generates a requests to query phoenix using the search API.
    Params
    ======
    - query: the search string, None for category browsing (default: None).
    - category_filter: id of the category to use as a filter (default: None).
    - location_filter: id of the location to use as a filter (default: None).
    - user_lat: latitude of the user location, useful for searching by 'current location' (default: None).
    - user_lon: longitude of the user location, useful for searching by 'current location' (default: None).
    - site: name of the site (e.g. 'olx.in', 'olx.com.pk', etc.) (default: 'olx.in' for India).
    - n_results: maximum number of ads to retrieve (default: 100).
    - layout: id of the layout to use (default: '100.5', see http://destiny.internal.apps.prd.ap-southeast-1.horizontals.olx.org/olxin/100.5).
    - sorting: sorting strategy, valid options are 'desc-relevance', 'desc-price', 'asc-price', 'desc-creation', and 'asc-distance' (default: None).
    - bucket_size: size of the proximity buckets (default: None, it doesn't seem to work!).
    """
    params = {
        'user': user,
        'size': str(n_results),
        'layout': str(layout),
        'facet_limit': 100,
        'location_facet_limit': 20,
    }
    
    if query:
        params['query'] = query
    
    if category_filter:
        params['category'] = str(category_filter)
        
    if location_filter:
        params['location'] = str(location_filter)
        
    if user_lat and user_lon:
        params['latitude'] = str(user_lat)
        params['longitude'] = str(user_lon)
    
    if sorting:
        params['sorting'] = sorting
        
    if bucket_size:
        params['bucket_size'] = bucket_size
    r = requests.get("https://www.{}/api/relevance/search?".format(site), params=params)
    
    
    return r.json()

In [96]:
def parse(response, location_jitter=False, jitter_stdev=1e-3):
    """Parses the phoenix response and creates a pandas dataframe containing the result of the search.
    Params:
    =======
    - response: json containing the phoenix response.
    - location_jitter: whether to add random noise to ads latitude and longitude or not (default: False).
    - jitter_stdev: if location_jitter is True, add random noise taken from gaussian(mean=0, stdev=jitter_stdev), (default: 1e-3). 
    """
    data = response['data']
    # parse sections
    sections = response['metadata']['sections']
    section_ids = []
    last_offset = 0
    section_name = 0
    for i, section in enumerate(sections):
        offset = section['offset']
        if offset > len(data):
            break
        section_ids.append(np.repeat(section_name, offset - last_offset))
        last_offset = offset
        section_name = section['id']
    section_ids.append(np.repeat(section_name, len(data) - last_offset))
    section_ids = np.concatenate(section_ids)
    # create list of results that will be used to create the pandas dataframe
    output = []
    for i, (result, section) in enumerate(zip(data, section_ids)):
        spell_name = "{} v{}".format(result['spell']['key'], result['spell']['version'])
        item_id = result['id']
        locations = result['locations'][0]
        if location_jitter:
            locations['lat'] += np.random.normal(0, jitter_stdev, 1)[0]
            locations['lon'] += np.random.normal(0, jitter_stdev, 1)[0]
        output.append((item_id, i+1, locations['lat'], locations['lon'], section, spell_name))
    return pd.DataFrame(output, columns=['item_id', 'rank', 'latitude', 'longitude', 'section', 'spell_name'])

In [142]:
df = pd.DataFrame(columns=['item_id', 'rank', 'latitude', 'longitude', 'section', 'spell_name' , 'feed'])
for user, version in zip(users, feed_version):
    response = query_phoenix(query='iphone xs', user=user, category_filter=84 , location_filter=5048 , layout=version)
    dfs = parse(response)
    dfs['feed'] = version
    df = df.append(dfs)
    

In [143]:
d1060 = df[(df['feed'] == '106.0') & (df['spell_name'] == 'SILENCE v2')][['item_id' , 'rank' , 'feed']]
d1061 = df[(df['feed'] == '106.1') & (df['spell_name'] == 'SILENCE v2')][['item_id' , 'rank' , 'feed']]

In [144]:
diffe = d1060.merge(d1061 , how='left' , on=['item_id'])
diffe['difference_position'] = np.abs(diffe['rank_y'] - diffe['rank_x'])
af = diffe.head(20)
af = af.replace({None: 'No aparecio'})
af.head(7)



Unnamed: 0,item_id,rank_x,feed_x,rank_y,feed_y,difference_position
0,1060718958,3,106.0,3,106.1,0
1,1060698944,4,106.0,7,106.1,3
2,1060648663,5,106.0,10,106.1,5
3,1060602792,6,106.0,8,106.1,2
4,1060590082,7,106.0,5,106.1,2
5,1060585499,8,106.0,9,106.1,1
6,1060467433,9,106.0,4,106.1,5


In [145]:
df[df['rank'] < 6]

Unnamed: 0,item_id,rank,latitude,longitude,section,spell_name,feed
0,1060822807,1,-33.967047,18.478682,0,CONFUSION v1,106.0
1,1060809105,2,-34.050313,18.589125,0,CONFUSION v1,106.0
2,1060718958,3,-33.9249,18.4241,0,SILENCE v2,106.0
3,1060698944,4,-33.879277,18.628731,0,SILENCE v2,106.0
4,1060648663,5,-33.9249,18.4241,0,SILENCE v2,106.0
0,1060822807,1,-33.967047,18.478682,0,CONFUSION v1,106.1
1,1060809105,2,-34.050313,18.589125,0,CONFUSION v1,106.1
2,1060718958,3,-33.9249,18.4241,0,SILENCE v2,106.1
3,1060467433,4,-33.813856,18.499775,0,SILENCE v2,106.1
4,1060590082,5,-33.813856,18.499775,0,SILENCE v2,106.1


In [71]:
for version in feed_version:
    print(query_phoenix(query='cars' , category_filter=84 , location_filter=5048 , layout=version))
    df = parse(response)
    df['feed'] = version



In [62]:
dfs

Unnamed: 0,item_id,rank,latitude,longitude,section,spell_name
0,1060750788,1,-33.810661,18.501593,0,DETECT_SECRET_DOORS v1
1,1060194230,2,-33.870815,18.700868,0,DETECT_SECRET_DOORS v1
2,1060823452,3,-33.882031,18.637104,0,SILENCE v2
3,1060823352,4,-33.949805,18.680038,0,SILENCE v2
4,1060165541,5,-33.953000,18.701000,0,SILENCE v2
5,1060823283,6,-34.033066,18.595433,0,SILENCE v2
6,1060823256,7,-34.033066,18.595433,0,SILENCE v2
7,1060823241,8,-33.907378,18.582732,0,SILENCE v2
8,1060823207,9,-33.990936,18.503905,0,SILENCE v2
9,1060823195,10,-33.935463,18.598265,0,SILENCE v2


In [38]:
df.item_id.nunique()

100