# API Quickstart for Luminoso Daylight (v4)

# Examples on BoA app-Reviews dataset

### Author: Boaz Odier - bodier@luminoso.com

## General helper functions

In [1]:
import csv

def save_to_CSV(filename, data):
    if len(filename) < 4 or filename[-4:] != '.csv':
        filename += '.csv'
    with open(filename, 'w', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=data[0].keys())
        writer.writeheader()
        writer.writerows(data)
    print('Data saved to file:  ', filename)

def load_from_CSV(filename):
    with open(filename, 'r') as file:
        data = [row for row in csv.DictReader(file)]
    print('Data loaded from file:  ', filename)
    return data

In [2]:
import datetime

def get_iso_date(d):
    return datetime.datetime.fromtimestamp(d).strftime('%Y-%m-%d')

In [3]:
def substitute_keys_in_dict(data_dict, new_keys_dict):
    '''
    Substitute the keys used in the data_dict, to the values returned by the new_keys_dict
    '''
    result = {}
    for k, v in data_dict.items():
        result[new_keys_dict[k]] = v
    return result

In [4]:
import dateutil.parser 
import datetime
import time

#Transform a date in string format into Unix Epoch date

def extract_date(string, dayfirst = False):
    #tries to find a date in a given string, and returns it as a float (number of seconds)
    #if no date found, returns None
    #default setting is to expect US format (month/day/year)
    try:
        d = dateutil.parser.parse(string, dayfirst = dayfirst) 
    except:
        return None
    else:
        return time.mktime(d.timetuple())

    
DATE_FORMAT_EU= "%d/%m/%Y"  # %Y = Year as 4 digits, %y is 2 digits
DATE_FORMAT_dby = "%d-%b-%y"  # %b = Month as locale’s abbreviated name, eg: Sep
DATE_FORMAT_EU_time = '%d/%m/%Y %H:%M'
DATE_FORMAT_ISO = '%Y-%m-%d'
    
def extract_formatted_date(date, date_format):
    #To be used when a specific date format is expected
    #use standard strptime function to get datetime object from a string
    try:
        d = datetime.datetime.strptime(date, date_format) 
    except:
        return None
    else:
        return time.mktime(d.timetuple())

## Helper functions specific to Luminoso usage

In [5]:
#may want to refactor this so output of 2nd value is a dict with key = subsettype, value = list of possible values
#then the get_subset_elements function can be subsumed.  
def get_subsets_info(connection):
    '''
    Given a Luminoso project connection, this will simply give the 'subsets/stats' API endpoint info,
    but also as a second value a list of the Subset Types. 
    '''
    substats = connection.get('subsets/stats')
    type_list = []
    for s in substats:
        if s['subset'] == '__all__':
            t = '__all__'
            s['subset_type'] = t
            s['subset_element'] = ''
        else:
            (t, e) = s['subset'].split(': ')
            s['subset_type'] = t
            s['subset_element'] = e
        
        if t not in type_list: 
            type_list.append(t)
    
    return substats, type_list

def get_subset_elements(subsets_info, subset_type):
    results = []
    for s in subsets_info:
        if s['subset_type'] == subset_type:
            results.append(s['subset'])
    return results

In [6]:
def get_all_docs(client, doc_fields=None):
    '''
    Get all docs from a Luminoso connection to a project (client)
    '''
    docs = []
    while True:
        if doc_fields:
            newdocs = client.get('docs', limit=25000, offset=len(docs), doc_fields=doc_fields)
        else:
            newdocs = client.get('docs', limit=25000, offset=len(docs))
        if newdocs:
            docs.extend(newdocs)
        else:
            return docs

In [7]:
def subset_array_to_dict(subsets_array):
    '''
    Given an array of subset values (as per Luminoso output from docs download),
    transforms it into a dictionary format
    '''
    obj={}
    for s in subsets_array:
        if  s != '__all__':
            (sub, val) = s.split(': ')
            obj[sub] = val
    return obj

In [8]:
def build_ID_TOPICNAME_dict(topicsdata):
    '''
    Given a Luminoso list of topics, as per API call to get('topics'), 
    build a topic ID to NAME mapping table (as a dict)
    '''
    result = {}
    for t in topicsdata:
        result[t['_id']] = t['name']
    return result

In [9]:
def create_or_update_topics(connection, new_topics):
    '''
    This function will create the given new topics into the project
    If a topic already exists (by name), then it will be overwritten with the new terms given
    Returns a dict mapping table of topicIDs to TopicNames
    '''
    project_info = connection.get()
    existing_topics = connection.get('topics')
    
    for n in new_topics:
        newName = n['Topic Name']
        newTerms = n['Topic Terms']
        isnew = True
        for x in existing_topics:
            if newName == x['name']:
                connection.put('topics/id/' + x['_id'], text = newTerms, name = newName) #we have to give the name as parameter as well, if not the name defaults to list of terms
                isnew = False
                print('The topic', newName, 'already exists - Overwritten with: ', newTerms )
        if isnew:
            connection.post('topics', text = newTerms, name = newName)
            print('The topic', newName, 'is new - Created with', newTerms)
    
    print('\nThe topics have been updated for project:',  project_info['name'], ', on workspace:', project_info['account_name'] )
    
    return connection.get('topics')

# -----------------------------CONNECTION & INFO----------------------------------

## Connecting to an Account (workspace) & list its projects

In [10]:
#some parameters:
user_name = 'bodier@luminoso.com'  #change to your own email that you use to login to Luminoso

#use the correct one from below:
api_url = 'https://analytics.luminoso.com/api/v4' 
#api_url = 'https://eu-analytics.luminoso.com/api/v4' 

account_id = 'n22d432u' #this is the accountID for your workspace

In [11]:
from luminoso_api import LuminosoClient

In [12]:
account_url = api_url + '/projects/' + account_id + '/'
account_client = LuminosoClient.connect(account_url, username = user_name)
all_projects = account_client.get()

Password for bodier@luminoso.com: ········


In [31]:
print('Total number of projects:', len(all_projects))
filterstr = 'BoA'.lower()
for p in all_projects:
    pn = p['name']
    if filterstr in pn.lower():
        print(p['account_name'], '  ',  p['project_id'], '  ',  pn)

Total number of projects: 21
Employee - Boaz Demo    prvps2m4    BoA Mobile App Reviews Demo
Employee - Boaz Demo    prjrm7d5    BoA reviews demo set
Employee - Boaz Demo    prpr8x3b    Unilever - BoA - Training


## Connect to a specific Luminoso Project

In [50]:
project_id = 'prjrm7d5'  
#change the end-point to it:
client = LuminosoClient.connect(account_url + project_id, username=user_name)
print('Connected to project: ' +  client.get()['name'])
direct_url = (account_url + project_id).replace('api/v4','app/#')
print(direct_url)

Password for bodier@luminoso.com: ········
Connected to project: BoA reviews demo set
https://analytics.luminoso.com/app/#/projects/n22d432u/prjrm7d5


In [51]:
#Get all the top level info about the project
project_info = client.get()
project_info

{'account_name': 'Employee - Boaz Demo',
 'axes': 150,
 'counts': {"Month: April '14": 682,
  "Month: August '14": 1172,
  "Month: December '14": 773,
  "Month: January '15": 425,
  "Month: July '14": 1479,
  "Month: June '14": 2613,
  "Month: March '14": 282,
  "Month: May '14": 1602,
  "Month: November '14": 696,
  "Month: October '14": 839,
  "Month: September '14": 1451,
  'Star Rating: 1': 5660,
  'Star Rating: 2': 1532,
  'Star Rating: 3': 1212,
  'Star Rating: 4': 1088,
  'Star Rating: 5': 2522,
  'Store + Rating: apple 1': 731,
  'Store + Rating: apple 2': 313,
  'Store + Rating: apple 3': 263,
  'Store + Rating: apple 4': 198,
  'Store + Rating: apple 5': 282,
  'Store + Rating: google 1': 4929,
  'Store + Rating: google 2': 1219,
  'Store + Rating: google 3': 949,
  'Store + Rating: google 4': 890,
  'Store + Rating: google 5': 2240,
  'Store: apple': 1787,
  'Store: google': 10227,
  '__all__': 12014},
 'creation_date': 1536053106.9767463,
 'creator': 'bodier@luminoso.com',


## Look at the available Subsets (ie: metadata / filters)

In [52]:
project_subsets, project_subsetTypes = get_subsets_info(client)
project_subsetTypes

['__all__', 'Month', 'Star Rating', 'Store + Rating', 'Store']

In [53]:
project_subsets[0:2]

[{'count': 12014,
  'mean': 'V7wyCAj30hBhjB2hD34EbZ-zaCjj9W-u9_1xJHFw8-QAWcGYGAH_-I58EF9Yy-4cFO59YCDvZ93zGmY_4U7SW70hF9B-xjA2h56f_ZXAA1-xj7hlDa-Dsk8eL7SRDFC8Oo-x58BM9Ly75uBO7-jpCex_0mA04-zDB_wEAI_cjFyi57oD7B9kCCXFFYh8Nj8XR-llAVt93AFG6BsRB0i_MC9aKEUNATP_9Q_58CMt_p2AZi80JBZr_mkAkrAAfA0JAHSEpLAl56Dm9wYCwr_vWCj1AVvCdV_xg_vlCeuBmT-709VKCqt9N6BsZ_hd9QqCLnCsn-Sk8ZT_MO_7r-wSB7w-KO_8fCieAE_BmyBVvAM_91V9bg7l7EaJAiaDB8-OX_U6_A4_gHDCU_7FBVU_zo_eoBdF-SV_TVDIJBMnB23_nE_OE_n3-1T-me-55BufBKQ',
  'subset': '__all__',
  'subset_element': '',
  'subset_type': '__all__'},
 {'count': 2613,
  'mean': 'V8beBZU5g3BxkEEZAdpG2m9ZlFMH8ndsE0x7-Nga5rs9SKLy4Cpi9Wu8Gh9z1Be6Fg19ZDBbL-9dGnh_Dj7PY6k9H7R-VUBvG5alBx6Ajs8Xl6G2DzeFbY74r658Cz-9vI_z-5jI8oV66G_pB_2FCQ_BG_BP5-cJAfZEuH_LlGBp6GFEpF83KDn_Ejj8YS77x-37_Qo9mlFqQAqkC3j_sd9iEFeu_zO_ZP_ArBbgAWd_-S8JpB6N_dHAMYBttA5Z_1QEQjAf56P29WRC52_VBClmAf_CTSAaf_j_Cg8Bq6_Nu9MND0D9SOBGy_0o-SwCOSCtb-0I7mG_iAACW_tLCMh-sM_sHCmD_91BN-BEv_tw9fY8ws8R4D5EBKQC419rb-lU-Ra_27DUY_uHCOl_sk_U0BKl-

In [54]:
example_subset = get_subset_elements(project_subsets, 'Store')
example_subset

['Store: google', 'Store: apple']

# ---------------------------------------TOPICS----------------------------------------

##  Add a new Topic

In [40]:
test_topic = {'Topic Name': 'SOME FANTASTOPIC', 'Topic Terms': 'Fantastic, topic, for testing !'}
test_topic

{'Topic Name': 'SOME FANTASTOPIC',
 'Topic Terms': 'Fantastic, topic, for testing !'}

In [41]:
#insert a new topic
new_topic = client.post('topics', text = test_topic['Topic Terms'], name = test_topic['Topic Name'])
new_topic
#you can now check that this topic exists in the UI of the project !

{'_id': '71494e05-bfd3-4d49-931b-0c1575ff4b4e',
 'color': '#808080',
 'name': 'SOME FANTASTOPIC',
 'text': 'Fantastic, topic, for testing !',
 'vector': 'W3-oAiQ1OgElm8NVH19_BhAAJ8ZXDNkxL_5W4GE5ARuAMl5Pf-hg96N8rM8zb6WK9MA9wwCCM9aoBfp5LO5NH_KtGr3_9D90lFvAFTbBLk30z7ctBU-Cat6X-9fxAGR9RkBw7HyuG8f5-YAn__kgBDi4yB_kKEjp_YkCLSC4k7S5Gee-K7-rCDim3RhG6z-hu8yuAzLCfTCfa9OuG4cCZd8_N8aDBUp9Rk8hb5QK_L9G4W-q99XHDvYAEeEg8BWj8iZDvcAliGBcEz-87-CY-9N1IhJ9H7-EJ-_R8mR7oDAsCC7HCkvFKQLoLFzzAez5sU9ACG1_COm_dX9-j-W0CHHC4Q77V3UL0UUCtg4Oh3STLluwIpBk_H_W-vBy5R6wqvdb7SrBzgAT0_0aJ1BCCjAph98nERw6Y_6goInEDZt2qYCgOFVR93zDWcCTQ9V_-EF'}

In [57]:
#  Now you can see it in the project, just refresh it in browser or click below
print(direct_url)

https://analytics.luminoso.com/app/#/projects/n22d432u/prjrm7d5


##  Delete a Topic

In [59]:
#delete this topic:
outcome_deletion = client.delete('topics/id/' + new_topic['_id'])
outcome_deletion
#you can now check that this topic is no longer in the UI of the project ! 

{'deleted': '71494e05-bfd3-4d49-931b-0c1575ff4b4e'}

In [58]:
#  Again you can check on your project, link below:
print(direct_url)

https://analytics.luminoso.com/app/#/projects/n22d432u/prjrm7d5


## Get all Topics currently in the Project

In [60]:
topics = client.get('topics')

In [61]:
topics[0:2]

[{'_id': '380a58b9-f1c9-433b-ba76-0847a3b1df65',
  'color': '#ff379c',
  'name': 'suggestions',
  'text': 'suggestions',
  'vector': 'W6SK_9e2XFGKB5eQBbT-RxAhuDdm6GA9wg8ypA6o7gNyGl9hk9Yl9Rc5kM-rRggQmOFRSuBr7D91CK90OU7ME_UB9qdAnu7VeFez_SRGJq-V5Dod_8eA1X6Z9DECCZB6A-FMcBZkGvW_kUAROE3wA7v9PpBEHA449dz8I4Arc6PyFpI-D6_3qBF37CyCkK-Rv--g-jTF7kAi58oqCeHBGMALuA0g9Zz6qa9jt7fS8X2_mBGAUBmiCL5_s5CEwBV1A8s-8o-DZJdc_0q8aTBvxDgHCgeBS0A9P8-49t7Fjb7os-rx_sUCIwCjwBQxDQT-wT8d--Kk7A6GHK_aDDB7BnoDvB9FgBQu76_Aoi_W3DAE_BF_G09RtFlWLnFDYGGzg-1y_qi_rV75REY3-c9-gHDQ99azABZCNuBweAxi85fD6F6rU9reGD1ClcC7R8UGBi3'},
 {'_id': '9220fe20-b2da-43b4-a45f-4ccbff6def24',
  'color': '#15cf20',
  'name': 'NEGATIVE_SENTIMENT',
  'text': 'bad,terrible,useless,frustrating',
  'vector': 'W07m50K8JM85GBh140xNIG_PUBdZ_HPus265jAnX_GqDu0GFhAv6EmL64oBv2Ab2BJ09XJJJL_S78vk0MMCMJ_dRRVaAUF-iw8_m_EMCoC7Bq7STBOBEqu70a-VL-g2AGv9XTDe24aU6fMBbc3D6NhrAw--gzB9_EwDEJUDjXHcd9fSBXOBbmFvO9od-MhATOEgCCaPCxVBvnCr8F5n8NZ_EbBjbFNCDc6ANB1j7CwmAib6sh-yd8VU_t

In [62]:
for x in topics:
    print( x['_id'], x['name'] , '  ',  x['text'])

380a58b9-f1c9-433b-ba76-0847a3b1df65 suggestions    suggestions
9220fe20-b2da-43b4-a45f-4ccbff6def24 NEGATIVE_SENTIMENT    bad,terrible,useless,frustrating
ae7f5a5f-aea5-4a30-9f6a-35ced14c1cbd frustrating    frustrating
60f31b6b-b514-45a9-aed3-3b7a4b8a3378 NOTIFICATIONS    notifications,alerts
a62ca8f0-ec2b-4e9b-9071-0d95076a811a bank    bank
36128d82-5480-4ef5-9e53-120196b1b6d0 Great app    Great app
9405bb67-3f32-412f-bdc3-2965c3184662 convenient    convenient
506d9652-7f74-42b8-a210-ec17384672d8 feature    feature
19368c12-b2b0-4b8d-9d6f-06c5d4bed8f5 alerts    alerts
4cf9e3a1-129e-4a47-a2dc-a27d9eae5673 transfer    transfer
24d7cb5c-f931-4cc9-a95b-55856d7f1b75 password    password
2890401b-15a9-418d-9c76-344741bb9429 can't log    can't log
bc87d37b-8ffd-42c1-9abc-b396e9d80710 camera    camera
cc16fab6-63d8-4226-8242-fb9d3236cbbc tried    tried
f366cc0d-7242-4e23-8b9f-6b244924f323 try to deposit a check    try to deposit a check
296b7f17-e235-471d-bad3-bcfba23d4955 photo    photo
8b5

In [63]:
#If needed, you can use the helper function from above to create a mapping table from topic IDs to their Names
id_topicnames_table = build_ID_TOPICNAME_dict(topics)

## Import topics data from a CSV

In [64]:
#API_TRAINING_BoA_topics_list.csv
topic_file = 'API_TRAINING_BoA_topics_list.csv'  #file from wich the topics will be loaded

input_topics = load_from_CSV(topic_file)

print('')
for x in input_topics:
    print(x['Topic Name'], ':' ,  x['Topic Terms'])

Data loaded from file:   BoA_topics_list.csv

BANK OF AMERICA : Bank of America, BoA, BofA
UPDATES : upgrade, new update, version
WIFI : wifi
ALERTS : alerts, notifications
PROBLEM DEPOSIT CHECKS : Can't deposit checks
SENTIMENT_NEGATIVE : bad, terrible,awful,sadly,annoying, frustrating,upset
SENTIMENT_POSITIVE : great, convenient, love this app, fantastic, awesome, good


## Upload all new Topics onto Project, overwriting the definition for each if it already exists

In [65]:
refreshed_topics = create_or_update_topics(client, input_topics)

The topic BANK OF AMERICA is new - Created with Bank of America, BoA, BofA
The topic UPDATES is new - Created with upgrade, new update, version
The topic WIFI is new - Created with wifi
The topic ALERTS is new - Created with alerts, notifications
The topic PROBLEM DEPOSIT CHECKS is new - Created with Can't deposit checks
The topic SENTIMENT_NEGATIVE is new - Created with bad, terrible,awful,sadly,annoying, frustrating,upset
The topic SENTIMENT_POSITIVE is new - Created with great, convenient, love this app, fantastic, awesome, good

The topics have been updated for project: BoA reviews demo set , on workspace: Employee - Boaz Demo


In [66]:
#If needed, use the helper function to get a mapping table from topic IDs to their Names
id_topicnames_table = build_ID_TOPICNAME_dict(refreshed_topics)
id_topicnames_table

{'04985643-a81e-4e95-b025-99fe70401190': 'BANK OF AMERICA',
 '181c60f2-8060-4ad8-b039-431f6890feec': 'WIFI',
 '19368c12-b2b0-4b8d-9d6f-06c5d4bed8f5': 'alerts',
 '20827ff6-1c6f-43fc-af55-214b28ee32bc': "Can't deposit checks",
 '24d7cb5c-f931-4cc9-a95b-55856d7f1b75': 'password',
 '2890401b-15a9-418d-9c76-344741bb9429': "can't log",
 '296b7f17-e235-471d-bad3-bcfba23d4955': 'photo',
 '2f78a9b7-aa89-43ce-a08e-eb962ace550e': 'SENTIMENT_NEGATIVE',
 '36128d82-5480-4ef5-9e53-120196b1b6d0': 'Great app',
 '380a58b9-f1c9-433b-ba76-0847a3b1df65': 'suggestions',
 '40aa94ec-13de-4527-973f-15b7af1a76c7': 'useless',
 '4b4b3d75-ffa5-4cd4-938d-b0d862c7365d': 'ALERTS',
 '4cf9e3a1-129e-4a47-a2dc-a27d9eae5673': 'transfer',
 '4d96a9af-52e3-4011-b45b-bb46b39e242e': 'screen',
 '506d9652-7f74-42b8-a210-ec17384672d8': 'feature',
 '5b032e3c-ab0e-48aa-b264-0dead14d82e8': 'transactions',
 '5b307e37-d8fa-43a8-b30f-bf0c6ed3583b': 'error',
 '60f31b6b-b514-45a9-aed3-3b7a4b8a3378': 'NOTIFICATIONS',
 '6d24aa97-c98d-47d6-

# --------------------GETTING DOCUMENTS FROM A PROJECT------------------

## Download all the documents

In [67]:
DOCS = get_all_docs(client)
len(DOCS)

12014

In [68]:
#Structure of a document in a project:
DOCS[105]

{'_id': 'uuid-a9dd3817fd464e31af1ecdadffb355c9',
 'fragments': [['credit|en', 'NOUN/T', [61, 67]],
  ['card|en', 'NOUN/T', [68, 72]]],
 'language': 'en',
 'predict': {},
 'source': {'type': 'Unknown'},
 'subsets': ['__all__',
  'Star Rating: 4',
  'Store + Rating: google 4',
  'Store: google',
  "Month: January '15"],
 'terms': [['point|en', 'NOUN/T', [7, 12]],
  ['good|en', 'ADJ/T', [16, 20]],
  ['allow|en', 'VERB/T', [21, 27]],
  ['u|en', 'NOUN-PROP/T', [28, 29]],
  ['check|en', 'VERB/T', [33, 38]],
  ['ur|en', 'ADJ/T', [39, 41]],
  ['balance|en', 'NOUN/T', [42, 49]],
  ['pay|en', 'VERB/T', [54, 57]],
  ['ur|en', 'ADJ/T', [58, 60]],
  ['credit|en card|en', 'NOUN/T NOUN/T', [61, 72]],
  ['obviously|en', 'ADV/T', [90, 99]],
  ['u|en', 'NOUN-PROP/T', [100, 101]],
  ['go|en', 'VERB/T', [107, 109]],
  ['branch|en', 'NOUN/T', [118, 124]]],
 'text': 'To the point Is good allows u to check ur balance and pay ur credit card other than that..obviously u must go in to a branch',
 'title': "4 st

## Saving documents into CSV

In [69]:
# choose which info to keep for export:
docs_output = []
for d in DOCS:
    obj = {}
    obj['Luminoso_docID'] = d['_id']
    obj['text'] = d['text']
    obj['title'] = d['title']
    metadata = subset_array_to_dict(d['subsets'])
    for subset, value in metadata.items():
        obj[subset] = value
    docs_output.append(obj)

In [70]:
len(docs_output)

12014

In [71]:
docs_output[0]

{'Luminoso_docID': 'uuid-1c8b63fe109b475886977a8ea0143be8',
 'Month': "January '15",
 'Star Rating': '4',
 'Store': 'google',
 'Store + Rating': 'google 4',
 'text': "BofA Review. Now that I have installed the BofA app how do Iopen it?  I don't see any icon like I had before,  when I had the app that I can push to bring me to the bank site.",
 'title': "4 stars, google, January '15"}

In [73]:
# Write to Output File
#docs_output_file = 'API_TRAINING_OUPUT_documents.csv'
docs_output_file = 'DOCS-DOWNLOAD_' + project_info['name'] #using name of project
save_to_CSV(docs_output_file, docs_output)

Data saved to file:   DOCS-DOWNLOAD_BoA reviews demo set.csv


# ----------------------------------------------------------------------------------------

## The 'terms/search' Endpoint

In [74]:
examples_terms = 'cheque'

In [75]:
# We can feed the API 'terms/search' endpoint with a list of topics, 
# and it will give us a list of search results with terms most closely associated with this list
exple_search_results = client.get('terms/search', text=examples_terms,limit=100)
exple_search_results.keys()

dict_keys(['search_results', 'search_vector'])

In [76]:
len(exple_search_results['search_results'])

100

In [82]:
# The results are in the 'search_results' value, and we also get the associated search vector, which we discard.
# Lets look at, for example, the 17th result:
exple_search_results['search_results'][2]

[{'all_texts': {'4 CORNERS': 1,
   '4 Corners': 1,
   '4 corner': 1,
   '4 corners': 85},
  'bg_freq': 1.191590909090909e-05,
  'distinct_doc_count': 84,
  'score': 84.0,
  'term': '4|en corner|en',
  'text': '4 corners',
  'total_doc_count': 98,
  'vector': 'X_5__37CI-_irDPwzxc9zB__B_3y7Yn9spGiUu1EEbf-296TZ8b_AW4-6GAZV9gi-fx-xZAsC-nI-6j_GLALXAYS_RzBSk-ZiBJ197vBYO-jaAFE-ls_Ir_lsCdb-La-Zc9RTE_7B_JAYeBl49Mc9xj_R0__TBr1AOB91_BjtAJdAAgAb5_ivAEx7HQ-KOBUfA3gBI1-4bBs9A91AQF_vdCMw64V9-RCCYApKD2qAFmCEJ-2yBpLBfz-ez_zhBbZ-fB--P-PgEca_6AAqDCWfAYpCSBAKy_dT-rr-0m-dsADnBpGBEh63XAft_r3-l1A6x-k6_XQBSU-5a-PPBy0-aAATLAGl-ae-t4CpJ8jKBB8AkQB9qB-G9luA5C-k4C66AYgBRb_au-Nk99T7tyBrW_Qs_bXA9h_9aBeL_mJD4QACKBlZ8o8AwPAq6A_dBAABfM'},
 0.83599454164505]

In [83]:
#the first element is the actual search results
exple_search_results['search_results'][2][0]['term']

'4|en corner|en'

In [84]:
#second element is the score
exple_search_results['search_results'][2][1]

0.83599454164505

### Another example of a search result

In [86]:
# Another example, look at the th result:
exple_search_results['search_results'][1][0]['term']

'perfectly|en clear|en'

### Using the 'terms/doc_counts' endpoint

In [91]:
exple_terms_array = []
exple_terms_array.append(exple_search_results['search_results'][1][0]['term'])
exple_terms_array.append(exple_search_results['search_results'][2][0]['term'])
exple_terms_array.append(exple_search_results['search_results'][9][0]['term'])
exple_terms_array

['perfectly|en clear|en', '4|en corner|en', 'retake|en']

In [92]:
exple_stats = client.get('terms/doc_counts', terms=exple_terms_array, format='json')

In [93]:
#the stats gives us the number of Exact & Related matches
for x in exple_stats:
    print(x)

{'num_related_matches': 3279, 'num_exact_matches': 24, 'text': 'perfectly clear'}
{'num_related_matches': 3156, 'num_exact_matches': 98, 'text': '4 corners'}
{'num_related_matches': 3206, 'num_exact_matches': 81, 'text': 'retake'}


### Counting on a Subset only

In [94]:
example_subset

['Store: google', 'Store: apple']

In [96]:
#get counts on only a subset:
chosen_subset = example_subset[0]
print('terms/doc_counts Results on a few terms, with subset =', chosen_subset)
exple_stats_SUBSET = client.get('terms/doc_counts', terms=exple_terms_array, subset=chosen_subset, format='json')
exple_stats_SUBSET                                                                            

terms/doc_counts Results on a few terms, with subset = Store: google


[{'num_exact_matches': 21,
  'num_related_matches': 2759,
  'text': 'perfectly clear'},
 {'num_exact_matches': 97, 'num_related_matches': 2639, 'text': '4 corners'},
 {'num_exact_matches': 76, 'num_related_matches': 2688, 'text': 'retake'}]

## Using the 'docs/vectors' endpoint to transform a list of terms in another format...

In [97]:
a_list = input_topics[3]['Topic Terms']
a_list

'alerts, notifications'

In [98]:
#putting in JSON format:
[{'text': a_list }]

[{'text': 'alerts, notifications'}]

In [99]:
vector_ping = client.upload('docs/vectors', [{'text': a_list }] )
vector_ping

[{'fragments': [],
  'terms': [['alert|en', 'NOUN/T', [0, 6]],
   ['notification|en', 'NOUN/T', [8, 21]]],
  'text': 'alerts, notifications',
  'vector': 'W_sfAEC8p9-d09MvBRt_IbCLlAGt_63OxYCXDCRA-Sf5T6-mBB7N4ziGBJA1YHH3AaWDVv-FPC3q7ep6ZY-0Z7zdCMs6S80p_DGN3tyFFD9YoFKQFX4-A6BZB8kX_jhErV4lb4LwH8J9ce3sF-3N9R8Bdv_a6Cch8Yr6ar7P-JV--pT-q0A8N1mUA0-ZmnFkwzjxHVC4yaChbENL5yY5cVzzUCwr4j5HxS9qRCBLKxIK0B-1y9WIBkg7XdypH5eD9maA3wE6r6uB8PCCLZAyF-d35k2AKn6h8COE832_P4CC29a-ILH5Kq96y9eZ6kq9pyFDZEeP8jn63I4k08qd32E9UUBG7CGy-DJCG8_979NV_9w8E_-bLAzDBSyD4VGFo8IN8g6B57EPE-oZAmb8oSCUbFll8yqAVh8nrDTH-MnBre8OO-j0AWe_SaDIm_vT_oW'}]

In [100]:
vector_ping[0]['terms']

[['alert|en', 'NOUN/T', [0, 6]], ['notification|en', 'NOUN/T', [8, 21]]]

In [101]:
#We select the data we need from result above, so as to have it in another format that we can re-use later
vector_ping_terms = [t for t,_,_ in vector_ping[0]['terms'] ]
vector_ping_terms

['alert|en', 'notification|en']

## ... to be used directly in the 'docs/search' endpoint, where we can filter by subset and get stats

In [105]:
chosen_subset

'Store: google'

In [106]:
#here we use a subset filter on which to do the search, and a total of 3 (limit) of docs results.
#We get the total stats on the subset, and the full docs search as well. 
ping_count = client.get('docs/search', terms = vector_ping_terms, limit = 3, subset = chosen_subset)
ping_count

{'exact_terms': ['notification|en', 'alert|en'],
 'num_exact_matches': 206,
 'num_related_matches': 137,
 'related_terms': ['not|en disappear|en',
  'not|en dismiss|en',
  'not|en reset|en',
  'badge|en',
  'log|en back|en',
  'log|en in|en twice|en',
  'recheck|en',
  'alert|en notification|en',
  'alert|en badge|en',
  'not|en go|en',
  'indicator|en',
  're-open|en',
  'not|en pop|en',
  'not|en enable|en',
  'back-in|en',
  'not|en refresh|en',
  'not|en turn|en',
  'count|en'],
 'search_results': [[{'conceptual_indices': [[35, 55]],
    'document': {'_id': 'uuid-d4486c0b57b14380bfa87f981bfafa96',
     'fragments': [['alert|en', 'VERB/T', [35, 40]],
      ['notification|en', 'NOUN/T', [42, 55]]],
     'language': 'en',
     'predict': {},
     'source': {'type': 'Unknown'},
     'subsets': ['__all__',
      'Store + Rating: google 2',
      'Store: google',
      'Star Rating: 2',
      "Month: January '15"],
     'terms': [['notification|en', 'NOUN/T', [0, 12]],
      ['alert|en',

# -----------------------------Topics-Docs Correlations------------------------------

## Using the topic document-counts endpoint (with or without a subset)

In [107]:
#Get topic document-counts
topics_docs_counts = client.get('topics/doc_counts', format='json')
topics_docs_counts2 = substitute_keys_in_dict(topics_docs_counts, id_topicnames_table)
topics_docs_counts2

{'ALERTS': {'num_exact_matches': 502, 'num_related_matches': 159},
 'BANK OF AMERICA': {'num_exact_matches': 1716, 'num_related_matches': 86},
 'BOA': {'num_exact_matches': 756, 'num_related_matches': 609},
 'BofA': {'num_exact_matches': 455, 'num_related_matches': 0},
 "Can't deposit checks": {'num_exact_matches': 560,
  'num_related_matches': 1650},
 'Check deposit': {'num_exact_matches': 876, 'num_related_matches': 1315},
 'Great app': {'num_exact_matches': 460, 'num_related_matches': 3038},
 'NEGATIVE_SENTIMENT': {'num_exact_matches': 1080, 'num_related_matches': 163},
 'NOTIFICATIONS': {'num_exact_matches': 502, 'num_related_matches': 159},
 'PROBLEM DEPOSIT CHECKS': {'num_exact_matches': 560,
  'num_related_matches': 1650},
 'Please fix': {'num_exact_matches': 787, 'num_related_matches': 502},
 'SENTIMENT_NEGATIVE': {'num_exact_matches': 1092, 'num_related_matches': 200},
 'SENTIMENT_POSITIVE': {'num_exact_matches': 2975,
  'num_related_matches': 1155},
 'UPDATES': {'num_exact_ma

In [109]:
#Get topic document-counts WITH a Subset
print('Subset =', chosen_subset)
topics_docs_counts_Subset = client.get('topics/doc_counts', format='json', subset=chosen_subset)
topics_docs_counts_Subset2 = substitute_keys_in_dict(topics_docs_counts_Subset, id_topicnames_table)
topics_docs_counts_Subset2

Subset = Store: google


{'ALERTS': {'num_exact_matches': 206, 'num_related_matches': 137},
 'BANK OF AMERICA': {'num_exact_matches': 1453, 'num_related_matches': 78},
 'BOA': {'num_exact_matches': 633, 'num_related_matches': 535},
 'BofA': {'num_exact_matches': 375, 'num_related_matches': 0},
 "Can't deposit checks": {'num_exact_matches': 523,
  'num_related_matches': 1459},
 'Check deposit': {'num_exact_matches': 738, 'num_related_matches': 1227},
 'Great app': {'num_exact_matches': 386, 'num_related_matches': 2512},
 'NEGATIVE_SENTIMENT': {'num_exact_matches': 864, 'num_related_matches': 120},
 'NOTIFICATIONS': {'num_exact_matches': 206, 'num_related_matches': 137},
 'PROBLEM DEPOSIT CHECKS': {'num_exact_matches': 523,
  'num_related_matches': 1459},
 'Please fix': {'num_exact_matches': 632, 'num_related_matches': 449},
 'SENTIMENT_NEGATIVE': {'num_exact_matches': 822, 'num_related_matches': 151},
 'SENTIMENT_POSITIVE': {'num_exact_matches': 2468, 'num_related_matches': 954},
 'UPDATES': {'num_exact_matches

## Finding the topic correlations on a new text

In [111]:
#Get topic correlation to text
newtext = 'This phone app does some weird connection on my internet'
newdoc_topics_correl = client.put('topics/text_correlation/', text = newtext )
newdoc_topics_correl2 = substitute_keys_in_dict(newdoc_topics_correl, id_topicnames_table)
print(newtext)
newdoc_topics_correl2

This phone app does some weird connection on my internet


{'ALERTS': -0.06053338944911957,
 'BANK OF AMERICA': -0.0362393818795681,
 'BOA': -0.06915919482707977,
 'BofA': 0.014693738892674446,
 "Can't deposit checks": -0.07225238531827927,
 'Check deposit': -0.10613860934972763,
 'Great app': 0.06916370242834091,
 'NEGATIVE_SENTIMENT': 0.10409155488014221,
 'NOTIFICATIONS': -0.06053338944911957,
 'PROBLEM DEPOSIT CHECKS': -0.07225238531827927,
 'Please fix': 0.2924363613128662,
 'SENTIMENT_NEGATIVE': 0.08079162240028381,
 'SENTIMENT_POSITIVE': -0.017181849107146263,
 'UPDATES': 0.17939557135105133,
 'WIFI': 0.7801828384399414,
 'alerts': -0.07981892675161362,
 'annoying': 0.015189670026302338,
 'bank': -0.11824480444192886,
 'camera': -0.12318222969770432,
 "can't log": 0.3530781865119934,
 'convenient': -0.03040151484310627,
 'crashes': 0.28705111145973206,
 'error': 0.31660082936286926,
 'feature': -0.18103653192520142,
 'fix': 0.050755374133586884,
 'frustrating': -0.0057780323550105095,
 'login': 0.04990636184811592,
 'new update': 0.1671

In [114]:
newtext = "the app is bloody good, can't wait to see the new features"
newdoc_topics_correl = client.put('topics/text_correlation/', text = newtext )
newdoc_topics_correl2 = substitute_keys_in_dict(newdoc_topics_correl, id_topicnames_table)
print(newtext)
newdoc_topics_correl2

the app is bloody good, can't wait to see the new features


{'ALERTS': -0.06875639408826828,
 'BANK OF AMERICA': 0.11251402646303177,
 'BOA': 0.019798260182142258,
 'BofA': 0.147403746843338,
 "Can't deposit checks": 0.019274752587080002,
 'Check deposit': 0.12782686948776245,
 'Great app': 0.49705883860588074,
 'NEGATIVE_SENTIMENT': 0.15367378294467926,
 'NOTIFICATIONS': -0.06875639408826828,
 'PROBLEM DEPOSIT CHECKS': 0.019274752587080002,
 'Please fix': -0.3536641001701355,
 'SENTIMENT_NEGATIVE': 0.12595346570014954,
 'SENTIMENT_POSITIVE': 0.6096550822257996,
 'UPDATES': 0.28686970472335815,
 'WIFI': -0.12376441061496735,
 'alerts': -0.08077340573072433,
 'annoying': -0.05109289288520813,
 'bank': 0.139847993850708,
 'camera': 0.0010686502791941166,
 "can't log": -0.2687162756919861,
 'convenient': 0.3232533931732178,
 'crashes': -0.0937182754278183,
 'error': -0.304455041885376,
 'feature': 0.7196014523506165,
 'fix': -0.07081860303878784,
 'frustrating': -0.14352935552597046,
 'login': 0.03525347635149956,
 'new update': 0.2803808748722076

## Getting all the docs-topics correlations

In [115]:
#for each document, this gives the topics-document correlations
docs_topics_correl = client.get('docs/correlations')
len(docs_topics_correl)

12014

In [116]:
nbr = 45
print(DOCS[nbr]['_id'])
DOCS[nbr]['text']

uuid-3395ea1fbdd44d8f82e164f7790fd95d


" After awhile it doesn't load just says not able to connect to network and I have plenty of internet"

In [118]:
one_doc_tc = docs_topics_correl['uuid-3395ea1fbdd44d8f82e164f7790fd95d']
one_doc_tc2 = substitute_keys_in_dict(one_doc_tc, id_topicnames_table)
one_doc_tc2

{'ALERTS': -0.0772978812456131,
 'BANK OF AMERICA': -0.15700112283229828,
 'BOA': -0.1342533677816391,
 'BofA': -0.09616976231336594,
 "Can't deposit checks": -0.09216569364070892,
 'Check deposit': -0.14812394976615906,
 'Great app': -0.07508641481399536,
 'NOTIFICATIONS': -0.0772978812456131,
 'PROBLEM DEPOSIT CHECKS': -0.09216569364070892,
 'Please fix': 0.3117063343524933,
 'SENTIMENT_NEGATIVE': 0.05104958266019821,
 'SENTIMENT_POSITIVE': -0.1344742625951767,
 'UPDATES': -0.13034775853157043,
 'WIFI': 0.903376579284668,
 'annoying': 0.03412872925400734,
 'bank': -0.1839921921491623,
 'camera': -0.21105052530765533,
 "can't log": 0.46726369857788086,
 'convenient': -0.11498216539621353,
 'crashes': 0.052563201636075974,
 'error': 0.5919801592826843,
 'feature': -0.2276753932237625,
 'fix': 0.003863133257254958,
 'frustrating': 0.12944167852401733,
 'login': 0.06471552699804306,
 'new update': -0.054524946957826614,
 'password': 0.015200857073068619,
 'photo': -0.09640734642744064,
 

# ------------------------ NEW PROJECTS & DOCS UPLOAD ----------------------

## Loading data from a CSV

In [119]:
data_file_for_upload = 'API-TRAINING_BoA_docs_for_upload_(sample).csv'
data_input = load_from_CSV(data_file_for_upload)

Data loaded from file:   API-TRAINING_BoA_docs_for_upload_(sample).csv


In [120]:
len(data_input)

100

In [121]:
data_input[0]

OrderedDict([('Month', "January '15"),
             ('Store', 'google'),
             ('Star Rating', '4'),
             ('Store + Rating', 'google 4'),
             ('title', "4 stars, google, January '15"),
             ('text',
              "BofA Review. Now that I have installed the BofA app how do Iopen it?  I don't see any icon like I had before,  when I had the app that I can push to bring me to the bank site."),
             ('date', '21/01/2015')])

In [122]:
#get the data into the correct JSON format for upload, including subsets syntax:
data_for_upload = []
for d in data_input:
    obj = {}
    obj['text'] = d['text']
    obj['title'] = d['title']
    obj['date'] = extract_formatted_date(d['date'], DATE_FORMAT_ISO)
    obj['subsets'] = ['Month: ' + d['Month'], 
                      'Store: ' + d['Store'],
                      'Star Rating: ' + d['Star Rating'],
                      'Store + Rating: ' + d['Store + Rating'] ]
    obj['language'] = 'en'
    data_for_upload.append(obj)

In [123]:
data_for_upload[0]

{'date': None,
 'language': 'en',
 'subsets': ["Month: January '15",
  'Store: google',
  'Star Rating: 4',
  'Store + Rating: google 4'],
 'text': "BofA Review. Now that I have installed the BofA app how do Iopen it?  I don't see any icon like I had before,  when I had the app that I can push to bring me to the bank site.",
 'title': "4 stars, google, January '15"}

## Creating a New Project

In [124]:
new_project_name = 'Example New Project (API training)'

# Create a new project
new_project = account_client.post(name = new_project_name)
new_project_client = account_client.change_path(new_project['project_id'])

new_project_client.upload('docs', data_for_upload)
print('Project created: ' +  new_project['name'])

Project created: Example New Project (API training)


In [125]:
jobID = new_project_client.post('docs/recalculate', language='en')
jobID

1