# Adding Suggestions
This notebook will walk you through adding suggestions to LightTag. 

First, we do some basic imports and set up config

In [4]:
import requests
import json
import pandas as pd
from requests.auth import HTTPBasicAuth

SERVER = "https://api-demo.lighttag.io" #The server is https://api-{your_subdomain_name.lighttag.io}
LT_USERNAME = "test" # Username of manager user
LT_PASSWORD = "test" #password of manager user

response = requests.post(SERVER +"/rest-auth/login/",
              json={"username":LT_USERNAME,"password":LT_PASSWORD}
             )
response

<Response [200]>

Once we log in, LightTag will return a token we can use for further calls. We'll put that in dict that we will pass as header

In [202]:
token = response.json()['key']
headers={'Authorization': 'Token {token}'.format(token=token)}


### Getting the data to annotate
To generate suggestions we will need data. Particularly we'll need both an ref:`example` and ref:`tag`

In [203]:
resp =requests.get(SERVER + "/manager/annotations/taskdefinition/?taskDefinitionId=cb658443-f617-43ef-ba28-d72d7453e60f",
              headers=headers
             )
resp.status_code

200

In [204]:
z = resp.json()

In [205]:
z.keys()


dict_keys(['dataset', 'id', 'schema'])

In [206]:
schema = z['schema']
tags = {x['name']:x['id'] for x in schema['tags']}

In [207]:
examples= z['dataset']['examples']

In [208]:
examples[0]

{'annotations': [],
 'content': 'Thanks for your support! https://t.co/iqUM1RfQso',
 'id': '0a33bc580fcb4ccaad0c4b146a0c24de',
 'metadata': {'created_at': 'Sat Oct 07 20:33:31 +0000 2017',
  'date': '2017-10-07',
  'favorite_count': 60168,
  'id_str': '916763438805848066',
  'in_reply_to_user_id_str': None,
  'is_retweet': False,
  'retweet_count': 12503,
  'source': 'Twitter for iPhone',
  'time': 736609}}

## Load trump suggestions

In [209]:
d = pd.read_csv(open('../trump/ny_times_data.csv'))
d.tag.unique()


array(['Location', 'insult', 'Person', 'accusation', 'Group', 'Issue',
       'compliment'], dtype=object)

In [187]:
countries = open('../trump/list_of_countries.txt').read().lower().split('\n')
countries +=['russia', 'north korea']

In [210]:
def fix_entity(row):
    if row.phrase=="Crooked":
        return "Insult"
    if row.phrase=="ObamaCare":
        return "Issue"
    if row.phrase.lower()=="media":
        return "Group"

    if row.phrase.lower() in countries:
        return "Location"
    if row.phrase=="Crooked H":
        return "Person"
    
    if row.tag!="entity":
        return row.tag.capitalize()
    if len(row.slug.split('-'))==2:
        return "Person"
    
    else:
        return"Group"
#d['tag'] = d.apply(fix_entity,1)

In [211]:
tags

{'Achievment': '076469a3-cf74-4569-bcf1-4f86cfabc520',
 'Event': '73a04bb7-0299-45f4-a3ad-5ec0ff3c67b0',
 'Group': '391a1945-2a66-4282-a80b-0aff2cab2965',
 'Insult': 'f35d78d3-2d92-4131-a5b5-f9680c1af2a5',
 'Issue': '5a13c8ee-fbcc-493c-9f18-f063fefdebb6',
 'Legislation': '899dd3fa-6b4c-43a8-84fb-0578682bb4df',
 'Location': 'f7909857-4bd0-4def-8c1a-51f77d8a7611',
 'Person': 'f6de1c78-45dd-4ae1-869a-b2e85053514a',
 'Position': '9339db8f-8328-468a-8d5a-f4acd0a6537a',
 'Superlative': 'ef44f1c3-cd91-4ec3-a7df-0431810caa54',
 'Transgression': 'b38c23a5-d02f-41e2-85f4-1ff0d29b7f3b'}

In [213]:
tags['Compliment'] = tags['Superlative']
tags['Accusation'] = tags['Transgression']
d['tag'] = d.tag.apply(lambda x:x.capitalize())
d['tag_id'] = d['tag'].apply(lambda x:tags[x])


In [214]:
stopWords = list(stopWords) +['New','new']

In [215]:
dd = d[(~d.phrase.str.lower().isin(stopWords)) & (d.phrase.apply(len)>1)]
phrases = dd.drop_duplicates(subset=['phrase'])[['phrase','tag_id']].to_dict(orient='records')

In [216]:
from flashtext import KeywordProcessor
keyword_processor = KeywordProcessor()
for phrase in phrases:
    keyword_processor.add_keyword(phrase['phrase'],phrase['tag_id'])

In [217]:
keyword_processor.extract_keywords("Tal in Washington",span_info=True)[0]


('391a1945-2a66-4282-a80b-0aff2cab2965', 7, 17)

## Start making the spans

In [218]:
from nltk.corpus import stopwords
stopWords = set(stopwords.words('english'))


In [219]:
keyword_processor.extract_keywords(" health care program",
                                   span_info=True)

[('391a1945-2a66-4282-a80b-0aff2cab2965', 1, 7),
 ('391a1945-2a66-4282-a80b-0aff2cab2965', 13, 20)]

In [220]:
suggestions = []
for example in examples:
    for tag_id,start,end in keyword_processor.extract_keywords(example['content'],span_info=True):
        suggestion= {
            "example_id":example['id'],
            "tag_id":tag_id,
            "start":start,
            "end":end
        }
        suggestions.append(suggestion)

In [221]:
data = {
    "model":{
        "name":"trump_suggestions",
        "schema_id":schema["id"],
    },
    "suggestions":suggestions
}

In [222]:
resp =requests.post(SERVER + "/manager/suggestions_model",
              headers=headers,
                    json=data,
                    
             )


In [200]:
resp

<Response [201]>