In [158]:
import pandas
import requests #use requests over 'urllib.request'. Work-around for encoding problem. See: https://stackoverflow.com/questions/47419104/unicodedecodeerror-utf-8-codec-cant-decode-python3
import json
import geojson
import folium
from folium import IFrame #folium does not have a .element object/attribute see https://github.com/python-visualization/folium/issues/774
from bs4 import BeautifulSoup

In [171]:
#GDELT GeoJson Endpoint
url = "https://api.gdeltproject.org/api/v1/gkg_geojson?QUERY=Lang:Eng&GCAMVAR=c8.3&MAXROWS=100"

#Issue request to URL
request = requests.get(url)
print(request.status_code)
if request.status_code == 200: data = request.json()
print(type(data))
print(len(data['features']))

200
<class 'dict'>
100


In [172]:
#I think this solution works!
#However, this will probably just work for my data only.

flat_dictionary = {}
for feature in data['features']:
    for key, value in feature.items():
      if type(feature[key]) == dict:
        for k, v in feature[key].items():
            if k in flat_dictionary.keys():
                flat_dictionary[k].append(feature[key][k])
            else:
                flat_dictionary[k] = [v]
   
df_features = pandas.DataFrame.from_dict(flat_dictionary)

#from: https://stackoverflow.com/questions/35491274/pandas-split-column-of-lists-into-multiple-columns
df_features[['lon','lat']] = pandas.DataFrame(df_features.coordinates.values.tolist(),index = df_features.index)
print(list(df_features))
print(df_features.shape)

['coordinates', 'mentionedthemes', 'name', 'type', 'url', 'urlpubtimedate', 'urltone', 'lon', 'lat']
(100, 9)


In [173]:
#get color based on "tone" score
def getColor(tone):
    max_tone = df_features.urltone.max()
    min_tone = df_features.urltone.min()
    tone_range = max_tone - min_tone
    pos_tone_rgb = 0
    neg_tone_rgb = 0
    
    tone_mod = int((tone-min_tone)/tone_range*125)
    if tone >= 0:
        pos_tone_rgb = 125+tone_mod
        neg_tone_rgb = 125-tone_mod
    else:  
        neg_tone_rgb = 125+tone_mod
        pos_tone_rgm = 125-tone_mod
    rgb = (neg_tone_rgb,0,pos_tone_rgb)
    hex_color = '#%02x%02x%02x' % (rgb) #from: https://stackoverflow.com/questions/3380726/converting-a-rgb-color-tuple-to-a-six-digit-code-in-python
    return hex_color

def getIframe(tone,url): #see: https://gis.stackexchange.com/questions/185897/how-can-i-include-html-in-a-folium-marker-popup
    html="""
    <ul>
        <li>Tone: {}</li>
        <li>Title: {}</li>
        <li>URL: <a href={} target=_blank>{}</a></li>
    </ul>
    """.format(tone,getHtmlText(url,'h1','h2','title'),url,url)
    iframe = folium.IFrame(html=html, width=500, height=150)
    return iframe

def getHtmlText(url,*tags):
    request = requests.get(url)
    raw_content = request.content
    soup = BeautifulSoup(raw_content,'html.parser')
    for tag in tags:
        try:
            text = soup.find(tag).get_text()
        except:
            text = "None"
        if text != "None": return text
    return text

In [174]:
#map with folium
mean_lat = df_features.mean().lat
mean_lon = df_features.mean().lon

m = folium.Map(location=[mean_lat,mean_lon],zoom_start=2.5)
feature_group = folium.FeatureGroup(name='events')

for lat,lon,tone,url in zip(df_features.lat,df_features.lon,df_features.urltone,df_features.url):
    folium.CircleMarker(
        radius=8,
        location = [lat,lon],
        popup=folium.Popup(getIframe(tone,url), max_width=2650),
        color=getColor(tone),
        fill=True,
        ).add_to(feature_group)
        
feature_group.add_to(m)
m

In [163]:
#another thing to try
#via https://www.haykranen.nl/2016/02/13/handling-complex-nested-dicts-in-python/

# class DictQuery(dict):
#     def get(self, path, default = None):
#         keys = path.split("/")
#         val = None

#         for key in keys:
#             if val:
#                 if isinstance(val, list):
#                     val = [ v.get(key, default) if v else None for v in val]
#                 else:
#                     val = val.get(key, default)
#             else:
#                 val = dict.get(self, key, default)

#             if not val:
#                 break;

#         return val

In [164]:
# from: https://stackoverflow.com/questions/12507206/python-recommended-way-to-walk-complex-dictionary-structures-imported-from-json    
#a function that flattens dictionaries into lists
#I'm not sure how to implement this for my data...
# def dict_generator(indict, pre=None):
#     pre = pre[:] if pre else []
#     if isinstance(indict, dict):
#         for key, value in indict.items():
#             if isinstance(value, dict):
#                 for d in dict_generator(value, [key] + pre):
#                     yield d
#             elif isinstance(value, list) or isinstance(value, tuple):
#                 for v in value:
#                     for d in dict_generator(v, [key] + pre):
#                         yield d
#             else:
#                 yield pre + [key, value]
#     else:
#         yield indict
        
# for i in dict_generator(data):
#     print(i)