In [1]:
from __future__ import print_function
import csv, json
import time
import nucleus_client
from nucleus_client.rest import ApiException
from pprint import pprint

# Configure API host and key

In [2]:
configuration = nucleus_client.Configuration()
configuration.host = 'UPDATE-WITH-API-HOST'
configuration.api_key['x-api-key'] = 'UPDATE-WITH-API-KEY'

# Dataset APIs

## Create API instance

In [3]:
api_instance = nucleus_client.DatasetsApi(nucleus_client.ApiClient(configuration))

## Append json from csv to dataset

In [4]:
# add documents to dataset
csv_file = 'trump_tweets.csv'
dataset = 'dataset_from_json'   

api_instance = nucleus_client.DatasetsApi(nucleus_client.ApiClient(configuration))
doc_cnt = 0
with open(csv_file, encoding='utf-8-sig') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        if doc_cnt < 10:
            payload = nucleus_client.Appendjsonparams(dataset=dataset, 
                                                  language='english', 
                                                  document={'time': row['time'],
                                                            'title': row['title'],
                                                            'content': row['content']}
                                                 )

            try:
                response = api_instance.post_append_json_to_dataset(payload)
            except ApiException as e:
                print("Exception when calling DatasetsApi->post_append_json_to_dataset: %s\n" % e)
        
        doc_cnt = doc_cnt + 1

## Append file from local drive to dataset

In [5]:
file = 'quarles20181109a.pdf' # file | 
dataset = 'dataset_from_file' # str | Destination dataset where the file will be inserted.

try:
    api_instance.post_upload_file(file, dataset)
except ApiException as e:
    print("Exception when calling DatasetsApi->post_upload_file: %s\n" % e)

## Append file from URL to dataset

In [6]:
dataset = 'dataset_from_url'
file_url = 'https://www.federalreserve.gov/newsevents/speech/files/quarles20181109a.pdf'
payload = nucleus_client.UploadURLModel(
                dataset=dataset,
                file_url=file_url
            ) # UploadURLModel | 

try:
    api_response = api_instance.post_upload_url(payload)
    pprint(api_response)
except ApiException as e:
    print("Exception when calling DatasetsApi->post_upload_url: %s\n" % e)

{'success': 'https://www.federalreserve.gov/newsevents/speech/files/quarles20181109a.pdf'}


## List available datasets

In [7]:
try:
    api_response = api_instance.get_list_datasets()
    pprint(api_response)
except ApiException as e:
    print("Exception when calling DatasetsApi->get_list_datasets: %s\n" % e)

{'list_datasets': "['dataset_from_file', 'dataset_from_json', "
                  "'dataset_from_url', 'dataset_json', 'dataset_test_delete', "
                  "'trump_tweets_test', 'trump_tweets_test_1108', "
                  "'trump_tweets_test1', 'trump_tweets_test111', "
                  "'trump_tweets_test2']"}


## Get dataset information

In [8]:
dataset = 'dataset_from_file' # str | Dataset name.
query = '' # str | Fulltext query, using mysql MATCH boolean query format. (optional)
metadata_selection = '' # str | json object of {\"metadata_field\":[\"selected_values\"]} (optional)
time_period = '' # str | Time period selection (optional)

try:
    api_response = api_instance.get_dataset_info(dataset, 
                                                 query=query, 
                                                 metadata_selection=metadata_selection, 
                                                 time_period=time_period)
    pprint(api_response)
except ApiException as e:
    print("Exception when calling DatasetsApi->get_dataset_info: %s\n" % e)

{'dataset': 'dataset_from_file',
 'detected_language': 'en',
 'metadata': '{}',
 'num_documents': '1',
 'time_range': '[1541823874.0, 1541823874.0]'}


## Delete a document

In [9]:
payload = nucleus_client.Deletedocumentmodel(dataset=dataset,
                                             docid='2') # Deletedocumentmodel | 

try:
    api_response = api_instance.post_delete_document(payload)
    pprint(api_response)
except ApiException as e:
    print("Exception when calling DatasetsApi->post_delete_document: %s\n" % e)

Exception when calling DatasetsApi->post_delete_document: (500)
Reason: Internal Server Error
HTTP response headers: HTTPHeaderDict({'Date': 'Fri, 09 Nov 2018 20:24:38 GMT', 'Content-Type': 'application/json', 'Content-Length': '58', 'Connection': 'keep-alive', 'x-amzn-RequestId': '7ad65c00-e45d-11e8-ac7d-c3ace5710c06', 'x-amzn-Remapped-Content-Length': '58', 'x-amz-apigw-id': 'QHH46HP_vHcFXcg=', 'x-amzn-Remapped-Server': 'Werkzeug/0.14.1 Python/3.5.2', 'x-amzn-Remapped-Date': 'Fri, 09 Nov 2018 20:24:38 GMT'})
HTTP response body: {
    "message": "IndexError : list index out of range"
}





## Delete the dataset

In [10]:
payload = nucleus_client.Deletedatasetmodel(dataset=dataset) # Deletedatasetmodel | 

try:
    api_response = api_instance.post_delete_dataset(payload)
    pprint(api_response)
except ApiException as e:
    print("Exception when calling DatasetsApi->post_delete_dataset: %s\n" % e)
    
# List datasets again to check if the specified dataset has been deleted
try:
    api_response = api_instance.get_list_datasets()
    pprint(api_response)
except ApiException as e:
    print("Exception when calling DatasetsApi->get_list_datasets: %s\n" % e)

{'success': 'Dataset deleted'}
{'list_datasets': "['dataset_from_json', 'dataset_from_url', 'dataset_json', "
                  "'dataset_test_delete', 'trump_tweets_test', "
                  "'trump_tweets_test_1108', 'trump_tweets_test1', "
                  "'trump_tweets_test111', 'trump_tweets_test2']"}


## Create a full dataset for testing other APIs

In [11]:
# add documents to dataset
csv_file = 'trump_tweets.csv'
dataset = 'trump_tweets_test'   

api_instance = nucleus_client.DatasetsApi(nucleus_client.ApiClient(configuration))

with open(csv_file, encoding='utf-8-sig') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        payload = nucleus_client.Appendjsonparams(dataset=dataset, 
                                                  language='english', 
                                                  document={'time': row['time'],
                                                            'title': row['title'],
                                                            'content': row['content']}
                                                 )

        try:
            response = api_instance.post_append_json_to_dataset(payload)
        except ApiException as e:
            print("Exception when calling DatasetsApi->post_append_json_to_dataset: %s\n" % e)

# Topic APIs

## Create API Instance

In [12]:
api_instance = nucleus_client.TopicsApi(nucleus_client.ApiClient(configuration))

## Get list of topics from dataset

In [13]:
dataset = dataset
#query = '("Trump" OR "president")' # str | Fulltext query, using mysql MATCH boolean query format. Example, (\"word1\" OR \"word2\") AND (\"word3\" OR \"word4\") (optional)
query = ''
custom_stop_words = ["real","hillary"] # ERRORUNKNOWN | List of stop words. (optional)
num_topics = 8 # int | Number of topics to be extracted from the dataset. (optional) (default to 8)
metadata_selection ="" # str | json object of {\"metadata_field\":[\"selected_values\"]} (optional)
time_period =""# str | Time period selection (optional)

try:
    api_response = api_instance.get_topic_api(dataset, 
                                              query=query, 
                                              custom_stop_words=custom_stop_words, 
                                              num_topics=num_topics, 
                                              metadata_selection=metadata_selection,
                                              time_period=time_period)
    pprint(api_response)
except ApiException as e:
    print("Exception when calling TopicsApi->get_topic_api: %s\n" % e)

{'results': [{'doc_topic_exposure': '[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, '
                                    '0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, '
                                    '0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, '
                                    '0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, '
                                    '0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, '
                                    '0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, '
                                    '0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, '
                                    '0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, '
                                    '0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, '
                                    '0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, '
                                    '0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, '
                                    '0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, '
                                    '0.0, 0.0, 0.0,

## Get topic summary

In [14]:
dataset = dataset # str | Dataset name.
#query = '("Trump" OR "president")' # str | Fulltext query, using mysql MATCH boolean query format. Example, (\"word1\" OR \"word2\") AND (\"word3\" OR \"word4\") (optional)
query = ''
custom_stop_words = ["real","hillary"] # ERRORUNKNOWN | List of stop words. (optional)
num_topics = 8 # int | Number of topics to be extracted from the dataset. (optional) (default to 8)
num_keywords = 8 # int | Number of keywords per topic that is extracted from the dataset. (optional) (default to 8)
summary_length = 6 # int | The maximum number of bullet points a user wants to see in each topic summary. (optional) (default to 6)
context_amount = 0 # int | The number of sentences surrounding key summary sentences in the documents that they come from. (optional) (default to 0)
num_docs = 20 # int | The maximum number of key documents to use for summarization. (optional) (default to 20)
excluded_docs = '' # str | List of document IDs that should be excluded from the analysis. Example, \"docid1, docid2, ..., docidN\"  (optional)

try:
    api_response = api_instance.get_topic_summary_api(dataset, 
                                                      query=query, 
                                                      custom_stop_words=custom_stop_words, 
                                                      num_topics=num_topics, 
                                                      num_keywords=num_keywords, 
                                                      summary_length=summary_length, 
                                                      context_amount=context_amount, 
                                                      num_docs=num_docs)
    pprint(api_response)
except ApiException as e:
    print("Exception when calling TopicsApi->get_topic_summary_api: %s\n" % e)

{'results': [{'summary': "[{'title': 'D_Trump2018_8_17_19_25', 'sourceid': 16, "
                         "'attribute': {'time': 1534559100.0, 'counts': None}, "
                         '\'sentences\': ["Which is worse Hightax Andrew '
                         "Cuomo's statement “WE’RE NOT GOING TO MAKE AMERICA "
                         'GREAT AGAIN IT WAS NEVER THAT GREAT” or Hillary '
                         'Clinton’s “DEPLORABLES” statement..."]}, {\'title\': '
                         "'D_Trump2018_8_17_11_44', 'sourceid': 24, "
                         "'attribute': {'time': 1534531440.0, 'counts': None}, "
                         "'sentences': ['How does a politician Cuomo known for "
                         'pushing people and businesses out of his state not '
                         'to mention having the highest taxes in the U.S. '
                         'survive making the statement WE’RE NOT GOING TO MAKE '
                         "AMERICA GREAT AGAIN IT WAS NEVER 

## Get topic sentiment

In [15]:
dataset = dataset # str | Dataset name.
#query = '("Trump" OR "president")' # str | Fulltext query, using mysql MATCH boolean query format. Example, (\"word1\" OR \"word2\") AND (\"word3\" OR \"word4\") (optional)
query = ''
custom_stop_words = ["real","hillary"] # ERRORUNKNOWN | List of stop words. (optional)
num_topics = 8 # int | Number of topics to be extracted from the dataset. (optional) (default to 8)
num_keywords = 8 # int | Number of keywords per topic that is extracted from the dataset. (optional) (default to 8)
excluded_docs = '' # str | List of document IDs that should be excluded from the analysis. Example, \"docid1, docid2, ..., docidN\"  (optional)

try:
    api_response = api_instance.get_topic_sentiment_api(dataset, 
                                                        query=query, 
                                                        custom_stop_words=custom_stop_words, 
                                                        num_topics=num_topics, 
                                                        num_keywords=num_keywords)
    pprint(api_response)
except ApiException as e:
    print("Exception when calling TopicsApi->get_topic_sentiment_api: %s\n" % e)

{'results': [{'document_scores': '[[0.13842981263820506, 0.080463311097383, '
                                 '0.07558211992548376, 0.09428682676944669, '
                                 '0.31289973656753545, 0.2983381930019461]]',
              'document_sentiments': '[0.2727272727272727, '
                                     '-0.2666666666666667, '
                                     '0.11764705882352941, '
                                     '0.45454545454545453, 0.6, '
                                     '0.5454545454545454]',
              'sentiment': '0.41851613034223817',
              'strength': '0.17198933454298942',
              'topic': 'america great;great great;andrew cuomo;taxed '
                       'andrew;highest taxed;great believe;governor '
                       'highest;believe governor'},
             {'document_scores': '[[0.11735514798169955, 0.11735514798169955, '
                                 '0.0929418313169472, 0.1355100525560452, '
         

## Get topic consensus

In [16]:
dataset = dataset # str | Dataset name.
query = '("Trump" OR "president")' # str | Fulltext query, using mysql MATCH boolean query format. Example, (\"word1\" OR \"word2\") AND (\"word3\" OR \"word4\") (optional)
custom_stop_words = ["real","hillary"] # ERRORUNKNOWN | List of stop words. (optional)
num_topics = 8 # int | Number of topics to be extracted from the dataset. (optional) (default to 8)
num_keywords = 8 # int | Number of keywords per topic that is extracted from the dataset. (optional) (default to 8)
excluded_docs = '' # str | List of document IDs that should be excluded from the analysis. Example, \"docid1, docid2, ..., docidN\"  (optional)

try:
    api_response = api_instance.get_topic_consensus_api(dataset, 
                                                        query=query, 
                                                        custom_stop_words=custom_stop_words, 
                                                        num_topics=num_topics, 
                                                        num_keywords=num_keywords)
    pprint(api_response)
except ApiException as e:
    print("Exception when calling TopicsApi->get_topic_consensus_api: %s\n" % e)

{'results': [{'consensus': '1.0',
              'strength': '0.18791715552843313',
              'topic': 'trump campaign;lou dobbs;democrats evidence;conflicts '
                       'angry;collusion trump;campaign russia;angry democrats'},
             {'consensus': '1.0',
              'strength': '0.16169879505464493',
              'topic': 'donald trump;witch hunt;frame donald;unfortunate '
                       'situation;situation decided;rigged witch;decided '
                       'frame'},
             {'consensus': '1.0',
              'strength': '0.12934265999548608',
              'topic': 'forward special;evidence collusion;dobbs '
                       'forward;special counsel;counsel conflicts;special '
                       'councel;councel conflicts'},
             {'consensus': '1.0',
              'strength': '0.13715894403433687',
              'topic': 'bruce ohr;christopher steele;fake dossier;time '
                       'fusion;helping disgraced;gps fa

# Document APIs

## Create API instance

In [17]:
api_instance = nucleus_client.DocumentsApi(nucleus_client.ApiClient(configuration))

## Get document information without content

In [18]:
dataset = dataset # str | Dataset name.
doc_titles = ['D_Trump2018_8_18_1_47']   # str | The title of the document to retrieve. Example: \" \"title 1\" \"  (optional)
doc_ids = ['11', '12', '20']      # int | The docid of the document to retrieve. Example: \"docid1\"  (optional)

try:
    api_response = api_instance.get_doc_info(dataset, doc_titles=doc_titles, doc_ids=doc_ids)
    pprint(api_response)
except ApiException as e:
    print("Exception when calling DocumentsApi->get_doc_info: %s\n" % e)

{'results': [{'attribute': {'author': None,
                            'source': None,
                            'time': '1534582020.0'},
              'sourceid': '10',
              'title': 'D_Trump2018_8_18_1_47'},
             {'attribute': {'author': None,
                            'source': None,
                            'time': '1534581960.0'},
              'sourceid': '11',
              'title': 'D_Trump2018_8_18_1_46'},
             {'attribute': {'author': None,
                            'source': None,
                            'time': '1534581420.0'},
              'sourceid': '12',
              'title': 'D_Trump2018_8_18_1_37'},
             {'attribute': {'author': None,
                            'source': None,
                            'time': '1534534680.0'},
              'sourceid': '20',
              'title': 'D_Trump2018_8_17_12_38'}]}


## Display document details

In [19]:
dataset = dataset # str | Dataset name.
doc_titles = ['D_Trump2018_8_18_1_47']   # str | The title of the document to retrieve. Example: \" \"title 1\" \"  (optional)
doc_ids = ['11']      # int | The docid of the document to retrieve. Example: \"docid1\"  (optional)

try:
    api_response = api_instance.get_doc_display(dataset, doc_titles=doc_titles, doc_ids=doc_ids)
    pprint(api_response)
except ApiException as e:
    print("Exception when calling DocumentsApi->get_doc_display_api: %s\n" % e)

{'results': [{'attribute': {'author': None,
                            'source': None,
                            'time': '1534582020.0'},
              'content': ' financial gain is a Federal Gratuity Statute '
                         'Violation Bribery Statute Violation Honest Services '
                         'Violation all Major Crimes because the DOJ is run by '
                         'BLANK Jeff Sessions ”  Gregg Jarrett. So when does '
                         'Mueller do what must be done? Probably never! '
                         '@FoxNews',
              'sourceid': '10',
              'title': 'D_Trump2018_8_18_1_47'},
             {'attribute': {'author': None,
                            'source': None,
                            'time': '1534581960.0'},
              'content': '“Bruce Ohr of DOJ is in legal jeopardy it’s '
                         'astonishing that he’s still employed. Bruce  Nelly '
                         'Ohr’s bank account is getting fatte

## Get document recommendation

In [20]:
dataset = dataset # str | Dataset name.
query = '("Trump" OR "president")' # str | Fulltext query, using mysql MATCH boolean query format. Example, (\"word1\" OR \"word2\") AND (\"word3\" OR \"word4\") (optional)
custom_stop_words = ["real","hillary"] # ERRORUNKNOWN | List of stop words. (optional)
num_topics = 8 # int | Number of topics to be extracted from the dataset. (optional) (default to 8)
num_keywords = 8 # int | Number of keywords per topic that is extracted from the dataset. (optional) (default to 8)
excluded_docs = '' # str | List of document IDs that should be excluded from the analysis. Example, \"docid1, docid2, ..., docidN\"  (optional)

try:
    api_response = api_instance.get_doc_recommend_api(dataset, 
                                                      query=query, 
                                                      custom_stop_words=custom_stop_words, 
                                                      num_topics=num_topics, 
                                                      num_keywords=num_keywords)
    pprint(api_response)
except ApiException as e:
    print("Exception when calling DocumentsApi->get_doc_recommend_api: %s\n" % e)

{'results': [{'recommendations': [{'attribute': {'author': None,
                                                 'source': None,
                                                 'time': '1534270860.0'},
                                   'sourceid': '73',
                                   'title': 'D_Trump2018_8_14_11_21'},
                                  {'attribute': {'author': None,
                                                 'source': None,
                                                 'time': '1534277700.0'},
                                   'sourceid': '66',
                                   'title': 'D_Trump2018_8_14_13_15'}],
              'topic': 'trump campaign;lou dobbs;democrats evidence;conflicts '
                       'angry;collusion trump;campaign russia;angry democrats'},
             {'recommendations': [{'attribute': {'author': None,
                                                 'source': None,
                                                 'ti

## Get document summary

In [21]:
dataset = dataset # str | Dataset name.
doc_title = 'D_Trump2018_8_15_15_4' # str | The title of the document to be summarized.
custom_stop_words = ["real","hillary"] # ERRORUNKNOWN | List of stop words. (optional)
summary_length = 6 # int | The maximum number of bullet points a user wants to see in the document summary. (optional) (default to 6)
context_amount = 0 # int | The number of sentences surrounding key summary sentences in the documents that they come from. (optional) (default to 0)

try:
    api_response = api_instance.get_doc_summary_api(dataset, doc_title, custom_stop_words=custom_stop_words, summary_length=summary_length, context_amount=context_amount)
    pprint(api_response)
except ApiException as e:
    print("Exception when calling DocumentsApi->get_doc_summary_api: %s\n" % e)

{'doc_title': 'D_Trump2018_8_15_15_4',
 'summary': {'sentences': "['Our Country was built on Tariffs and Tariffs are "
                          'now leading us to great new Trade Deals - as '
                          'opposed to the horrible and unfair Trade Deals that '
                          "I inherited as your President.', 'Other Countries "
                          'should not be allowed to come in and steal the '
                          "wealth of our great U.S.A. No longer!']",
             'sourceid': '50'}}
