### Animated map which displays the protests locations in sequence
With:
- ##### <u> SQLDATE </u>
- ##### <u> ActionGeo_Lat </u>
- ##### <u> ActionGeo_Long </u>

In [1]:
from google.cloud import bigquery
import pandas as pd
import numpy as np
import os
from IPython.core.display import display, HTML

DATA_PATH = "data/"
LEAFLET_PATH = 'Leaflet.MovingMarker-master\\animated_map\\'

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = 'C:\\Users\\antho\\Downloads\\ADAproj-02b470a29ee4.json'
bigquery_client = bigquery.Client()

In [2]:
#Fetching data from GDELT
## BIG DATA, 3 000 000 rows

if os.path.isfile(DATA_PATH + 'protests_location.csv') :
    #If we already saved the data, don't run the query again, just get the data from the file saved previously
    protests_df_location = pd.read_csv(DATA_PATH + 'protests_location.csv')
else:
    #query to get the date and the location of the protest events
    #Remark: '14%' filters the protest events because they all start by '14'
    query_protests_location = bigquery_client.query(
        """SELECT SQLDATE, ActionGeo_Lat, ActionGeo_Long, EventCode FROM `gdelt-bq.gdeltv2.events` 
        WHERE EventCode LIKE '14%' """)
    protests_df_location = query_protests_location.result().to_dataframe()
    # Write down a csv file
    protests_df_location.to_csv(DATA_PATH + 'protests_location.csv', index=False)
protests_df_location = protests_df_location.dropna()
protests_df_location.count()

SQLDATE           3440495
ActionGeo_Lat     3440495
ActionGeo_Long    3440495
EventCode         3440495
dtype: int64

In [3]:
#Extracting ActionGeo_Lat and ActionGeo_Long
protests_without_duplicated_values = protests_df_location
#protests_without_duplicated_values['ActionGeo_Lat'] = protests_df_location['ActionGeo_Lat']
#protests_without_duplicated_values['ActionGeo_Long'] = protests_df_location['ActionGeo_Long']

#Removing duplicated values 
#The values are not really duplicated, they took place on different day
#Plus, the localization of national protest is in the center of the country 
#We thus have a duplicated localizations when we have several national protests in the same country
protests_without_duplicated_values=protests_without_duplicated_values.drop_duplicates(subset=['ActionGeo_Long', 'ActionGeo_Lat'], keep=False)
protests_without_duplicated_values=protests_without_duplicated_values.sort_values('SQLDATE')
protests_without_duplicated_values=protests_without_duplicated_values[protests_without_duplicated_values['SQLDATE']>=20140000]

In [4]:
protests_without_duplicated_values['EventCode']= protests_without_duplicated_values['EventCode'].str[0:3]
protests_without_duplicated_values['EventCode'].unique()

AttributeError: Can only use .str accessor with string values, which use np.object_ dtype in pandas

In [None]:
protests_df_for_js = pd.DataFrame()
protests_df_for_js["coord_for_js"] = '[' + protests_without_duplicated_values['ActionGeo_Lat'].astype(str) + ',' + protests_without_duplicated_values['ActionGeo_Long'].astype(str) + '],'
protests_df_for_js["dates"] = protests_without_duplicated_values['SQLDATE'].astype(str)
protests_df_for_js["dates"] = '[' + protests_df_for_js['dates'].str[0:4] + protests_df_for_js['dates'].str[4:6] + protests_df_for_js['dates'].str[6:8] + '],'
protests_df_for_js["event_code"] = protests_without_duplicated_values['EventCode'].astype(str)
protests_df_for_js["event_code"] = '[' + protests_df_for_js['event_code'] + '],'

In [None]:
markers_number = 4
markers_speed = 4000000 / markers_number

protests_coordinates=""
protests_types=""
protests_dates=""
marker_declaration=""
string_addstation=""

for marker_idx in range(1,markers_number+1):
    protests_coordinates = protests_coordinates + "\n var protests_coordinates" + str(marker_idx) + " = ["
    protests_dates = protests_dates + "\n var protests_dates" + str(marker_idx) + " = ["
    protests_types = protests_types + "\n var protests_types" + str(marker_idx) + " = ["
    addstation_idx = 0
    i = 0
    for idx in range(marker_idx,len(protests_df_for_js),markers_number):
        i=i+1
        protests_coordinates = protests_coordinates + protests_df_for_js["coord_for_js"].iloc[idx]
        protests_dates = protests_dates + protests_df_for_js["dates"].iloc[idx]
        protests_types = protests_types + protests_df_for_js["event_code"].iloc[idx]
        if addstation_idx == 0 :
                addstation_idx = 1
                string_addstation = string_addstation + "\n"
        string_addstation = string_addstation + "marker" + str(marker_idx) + ".addStation(" + str(i) + ", 500);"
    
    #protests_df_for_js=protests_df_for_js.iloc[number_of_protests_per_loop:]
    protests_coordinates = protests_coordinates[:-1]
    protests_coordinates = protests_coordinates + "]"
    protests_dates = protests_dates[:-1]
    protests_dates = protests_dates + "]"  
    protests_types = protests_types[:-1]
    protests_types = protests_types + "]"        

    marker_declaration = marker_declaration + "\n var marker" + str(marker_idx) + " = L.Marker.movingMarker(protests_coordinates" + str(marker_idx) + ",protests_dates1,protests_types" + str(marker_idx) +"," + str(markers_speed) + ", {autostart: true}).addTo(map);"


In [None]:
readFile = open(LEAFLET_PATH + "script_origin.js")

lines = readFile.readlines()
lines = lines[:-1]
readFile.close()

lines.append(protests_coordinates+'\n')
lines.append(protests_dates+'\n')
lines.append(protests_types+'\n')
lines.append(marker_declaration+'\n')
lines.append(string_addstation+'\n')

In [None]:
w = open(LEAFLET_PATH + "script.js",'w')

w.writelines([item for item in lines])

w.close()

In [None]:
display(HTML("<h1><a href='"+LEAFLET_PATH+"index.html' target='_blank'>Animated map that displays protests location in sequence (Ctrl+Click)</a></h1>"))