In [None]:
import copy
import requests
import json
import datetime

from news_signals import signals

In [None]:
# utility function for getting a wikidata id from an entity name
WIKIDATA_SEARCH_URL = "https://www.wikidata.org/w/api.php"
DEFAULT_SEARCH_PARAMS = {
    "action": "wbsearchentities",
    "format": "json",
    "errorformat": "plaintext",
    "language": "en",
    "uselang": "en",
    "type": "item",
    "limit": 1
}
WD_ENTITY_BASE_URL = 'https://www.wikidata.org/wiki/Special:EntityData'
    

def search_wikidata(surface_form, min_length=3):
    params = copy.deepcopy(DEFAULT_SEARCH_PARAMS)
    params["search"] = surface_form
    result = []
    try:
        print(f'querying wikidata with params: {params}')
        r = requests.get(url=WIKIDATA_SEARCH_URL, params=params)
        data = json.loads(r.text)
        if 'search' in data:
            result = data['search']
            result = result[0:min(100, len(result))]
    except Exception as e:
        print(f'Error searching wikidata for surface form: {surface_form}')
        print(e)
    return result

In [None]:
entity_name = 'Twitter'

entity_id_candidates = search_wikidata(entity_name)
test_entity = entity_id_candidates[0]

In [None]:
test_entity

In [None]:
signal = signals.AylienSignal(
    name=test_entity['label'],
    params={"entity_ids": [test_entity['id']]}
)

In [None]:
start = '2023-01-01'
end = '2023-02-21'

timeseries_signal = signal(start, end)

In [None]:
signal.plot()

In [None]:
highest_anomaly_day = signal.anomaly_signal().anomalies.idxmax()

In [None]:
highest_anomaly_day

In [None]:
stories_df = signal.sample_stories_in_window(
    start=highest_anomaly_day,
    end=highest_anomaly_day + datetime.timedelta(days=1)
)

In [None]:
for s in stories_df.stories[0]:
    print(s['title'])