### GOALS:     
- find a wikidata entity by name
- look at the volume timeseries for the entity
- understand the stories that caused any anomalies
- save the signal for future investigation

In [None]:
# not needed if news_signals is already installed
# !pip install -q news_signals

In [None]:
import datetime

from news_signals import signals, newsapi, wikidata_utils

In [None]:
# create a trial account here: https://aylien.com/news-api-signup
# then go to https://app.aylien.com/dashboard to get your credentials

# note it's better to set these as environment variables if you know how, and delete this cell
# so that API keys aren't accidentally passed around
NEWSAPI_APP_ID='<set-this>'
NEWSAPI_APP_KEY='<set-this>'
newsapi.set_headers(NEWSAPI_APP_ID, NEWSAPI_APP_KEY)

In [None]:
# let's setup the entity we want to work with
entity_name = 'OpenAI'

entity_id_candidates = wikidata_utils.search_wikidata(entity_name)
test_entity = entity_id_candidates[0]
test_entity

In [None]:
# cool, now let's create a signal
signal = signals.AylienSignal(
    name=test_entity['label'],
    params={"entity_ids": [test_entity['id']]}
)

In [None]:
# let's instantiate our signal for the time period we care about
start = '2023-01-01'
end = '2023-02-01'


timeseries_signal = signal(start, end)
timeseries_signal.plot()

In [None]:
# did the signal have any unexpected spikes?
anomaly_signal = signal.anomaly_signal()
anomaly_signal.anomalies.plot(color='red')

In [None]:
# let's have a look at the biggest anomaly
highest_anomaly_day = signal.anomalies.idxmax()
highest_anomaly_day

In [None]:
# what was going on that day?
signal = signal.sample_stories_in_window(
    start=highest_anomaly_day,
    end=highest_anomaly_day + datetime.timedelta(days=1)
)

for s in signal.feeds_df.stories[0]:
    print(s['title'])

In [None]:
from pathlib import Path


signal_storage_dir = Path('tmp/signal_store')
signal_storage_dir.mkdir(exist_ok=True, parents=True)

signal.save(signal_storage_dir)

In [None]:
reloaded_signal = signals.Signal.load(signal_storage_dir)[0]

In [None]:
reloaded_signal.plot()