### Animated map which displays the protests locations in sequence
With:
- ##### <u> SQLDATE </u>
- ##### <u> ActionGeo_Lat </u>
- ##### <u> ActionGeo_Long </u>

In [1]:
from google.cloud import bigquery
import pandas as pd
import numpy as np
import os
from IPython.core.display import display, HTML

DATA_PATH = "data/"
LEAFLET_PATH = 'animated_maps/'
WORLD_MAP_PATH = LEAFLET_PATH + 'world_map/'
COUNTRY_MAP_PATH = LEAFLET_PATH + 'individual_country_map/'

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = 'C:\\Users\\antho\\Downloads\\adafinalproject-b2214ea058a4.json'
bigquery_client = bigquery.Client()

In [2]:
#Fetching data from GDELT
## BIG DATA, 3 000 000 rows

if os.path.isfile(DATA_PATH + 'protests_location.csv') :
    #If we already saved the data, don't run the query again, just get the data from the file saved previously
    protests_df_location = pd.read_csv(DATA_PATH + 'protests_location.csv')
else:
    #query to get the date and the location of the protest events
    #Remark: '14%' filters the protest events because they all start by '14'
    query_protests_location = bigquery_client.query(
        """SELECT SQLDATE, ActionGeo_Lat, ActionGeo_Long, EventCode, ActionGeo_FullName  FROM `gdelt-bq.gdeltv2.events` 
        WHERE EventCode LIKE '14%' """)
    protests_df_location = query_protests_location.result().to_dataframe()
    # Write down a csv file
    protests_df_location.to_csv(DATA_PATH + 'protests_location.csv', index=False)
protests_df_location = protests_df_location.dropna()
protests_df_location.count()

SQLDATE               1769903
ActionGeo_Lat         1769903
ActionGeo_Long        1769903
EventCode             1769903
ActionGeo_FullName    1769903
dtype: int64

## Animated world map

In [3]:
#Extracting ActionGeo_Lat and ActionGeo_Long
protests_without_duplicated_values = protests_df_location
#protests_without_duplicated_values['ActionGeo_Lat'] = protests_df_location['ActionGeo_Lat']
#protests_without_duplicated_values['ActionGeo_Long'] = protests_df_location['ActionGeo_Long']

#Removing duplicated values 
#The values are not really duplicated, they took place on different day
#Plus, the localization of national protest is in the center of the country 
#We thus have a duplicated localizations when we have several national protests in the same country
protests_without_duplicated_values=protests_without_duplicated_values.drop_duplicates(subset=['ActionGeo_Long', 'ActionGeo_Lat'], keep=False)
protests_without_duplicated_values=protests_without_duplicated_values.sort_values('SQLDATE')
protests_without_duplicated_values=protests_without_duplicated_values[protests_without_duplicated_values['SQLDATE']>=20140000]

In [4]:
protests_without_duplicated_values['EventCode']= protests_without_duplicated_values['EventCode'].astype(str)
protests_without_duplicated_values['EventCode']= protests_without_duplicated_values['EventCode'].str[0:3]
protests_without_duplicated_values['EventCode'].unique()

array(['141', '140', '143'], dtype=object)

In [5]:
protests_df_for_js = pd.DataFrame()
protests_df_for_js["coord_for_js"] = '[' + protests_without_duplicated_values['ActionGeo_Lat'].astype(str) + ',' + protests_without_duplicated_values['ActionGeo_Long'].astype(str) + '],'
protests_df_for_js["dates"] = protests_without_duplicated_values['SQLDATE'].astype(str)
protests_df_for_js["dates"] = '[' + protests_df_for_js['dates'].str[0:4] + protests_df_for_js['dates'].str[4:6] + protests_df_for_js['dates'].str[6:8] + '],'
protests_df_for_js["event_code"] = protests_without_duplicated_values['EventCode'].astype(str)
protests_df_for_js["event_code"] = '[' + protests_df_for_js['event_code'] + '],'


In [22]:
def updating_js_script(df, SCRIPT_PATH, markers_number=4, markers_speed=1, LatLong="[35.435804, 6.634183]", zoom="2"):
    
    simulation_duration = len(df.index)/markers_number/(markers_speed/100)
    protests_coordinates="" 
    protests_counts=""
    protests_types=""
    protests_dates=""
    marker_declaration=""
    string_addstation=""

    for marker_idx in range(1,markers_number+1):
        protests_coordinates = protests_coordinates + "\n var protests_coordinates" + str(marker_idx) + " = ["
        protests_dates = protests_dates + "\n var protests_dates" + str(marker_idx) + " = ["
        protests_counts = protests_counts + "\n var protests_counts" + str(marker_idx) + " = ["
        protests_types = protests_types + "\n var protests_types" + str(marker_idx) + " = ["
        addstation_idx = 0
        i = 0
        for idx in range(marker_idx,len(df),markers_number):
            i=i+1
            protests_coordinates = protests_coordinates + df["coord_for_js"].iloc[idx]
            protests_dates = protests_dates + df["dates"].iloc[idx]
            protests_types = protests_types + df["event_code"].iloc[idx]  
            if 'count' in df:
                protests_counts = protests_counts + df["count"].iloc[idx]

            if addstation_idx == 0 :
                    addstation_idx = 1
                    string_addstation = string_addstation + "\n"
            string_addstation = string_addstation + "marker" + str(marker_idx) + ".addStation(" + str(i) + ", 500);"

        #protests_df_for_js=protests_df_for_js.iloc[number_of_protests_per_loop:]
        protests_coordinates = protests_coordinates[:-1]
        protests_coordinates = protests_coordinates + "]"
        protests_dates = protests_dates[:-1]
        protests_dates = protests_dates + "]"  
        protests_types = protests_types[:-1]
        protests_types = protests_types + "]"
        protests_counts = protests_counts[:-1]
        protests_counts = protests_counts + "]"         
        if 'count' in df:
            marker_declaration = marker_declaration + "\n var marker" + str(marker_idx) + " = L.Marker.movingMarker(protests_coordinates" + str(marker_idx) + ",protests_dates1,protests_types" + str(marker_idx) +",protests_counts"+ str(marker_idx) +"," + str(simulation_duration) + ", {autostart: true}).addTo(map);"
        else:
            marker_declaration = marker_declaration + "\n var marker" + str(marker_idx) + " = L.Marker.movingMarker(protests_coordinates" + str(marker_idx) + ",protests_dates1,protests_types" + str(marker_idx) +"," + str(simulation_duration) + ", {autostart: true}).addTo(map);"

    mapfit = 'var map = new L.Map(\'map\', {center:'+LatLong+',zoom:'+zoom+'});map.addLayer(layer);'
    readFile = open(SCRIPT_PATH + "script_origin.js")

    lines = readFile.readlines()
    lines = lines[:-1]
    readFile.close()
    
    lines.append(mapfit+'\n')
    lines.append(protests_coordinates+'\n')
    if 'count' in df:
        lines.append(protests_counts+'\n')
    lines.append(protests_dates+'\n')
    lines.append(protests_types+'\n')
    lines.append(marker_declaration+'\n')
    lines.append(string_addstation+'\n')
    w = open(SCRIPT_PATH + "script.js",'w')

    w.writelines([item for item in lines])

    w.close()

In [23]:
updating_js_script(protests_df_for_js, WORLD_MAP_PATH)

In [24]:
display(HTML("<h1><a href='"+WORLD_MAP_PATH+"index.html' target='_blank'>Animated map that displays protests day by day (Ctrl+Click)</a></h1>"))

## Animated map per country

In [9]:
#protests_df_location.ActionGeo_CountryCode.unique()

In [10]:
country_location = pd.read_csv(DATA_PATH + 'country_lat_long.csv')
for cln in country_location:
    country_location[cln] = country_location[cln].astype(str)
country_location['LatLong'] = '['+country_location['Latitude']+','+country_location['Longitude']+']'

In [11]:
country_wanted = "United States"
s = pd.Series(country_location['CountryName'])
if country_wanted in s.unique():
    latlong = country_location[country_location['CountryName'].str.contains(country_wanted)]['LatLong']
    latlong = latlong.iloc[0]
else: 
    print("Country wanted doesn't exist")
date_start = 20160101
date_end = 20160303

In [12]:
protests_wanted = protests_df_location[protests_df_location['ActionGeo_FullName'].str.contains(country_wanted)]
protests_wanted = protests_wanted[protests_wanted.SQLDATE > date_start]
protests_wanted = protests_wanted[protests_wanted.SQLDATE < date_end]
protests_wanted=protests_wanted.sort_values('SQLDATE')

In [13]:
protests_wanted.count()

SQLDATE               19560
ActionGeo_Lat         19560
ActionGeo_Long        19560
EventCode             19560
ActionGeo_FullName    19560
dtype: int64

In [14]:
for col in protests_wanted:
    protests_wanted[col] = protests_wanted[col].astype(str)

In [15]:
protests_wanted_for_js = pd.DataFrame()
protests_wanted_for_js["coord_for_js"] = '[' + protests_wanted['ActionGeo_Lat'] + ',' + protests_wanted['ActionGeo_Long'] + '],'
protests_wanted_for_js["dates"] = protests_wanted['SQLDATE']
protests_wanted_for_js["dates"] = '[' + protests_wanted_for_js['dates'].str[0:4] + protests_wanted_for_js['dates'].str[4:6] + protests_wanted_for_js['dates'].str[6:8] + '],'
protests_wanted_for_js["event_code"] = protests_wanted['EventCode']
protests_wanted_for_js["event_code"] = '[' + protests_wanted_for_js['event_code'] + '],'

In [16]:
protests_wanted_for_js['count'] = 0

In [17]:
List = []
count = []
index = 0
for value in protests_wanted_for_js['coord_for_js']:
    if value in List:
        idx = List.index(value)
        count[idx] = count[idx] + 1
        protests_wanted_for_js.iloc[index, protests_wanted_for_js.columns.get_loc('count')] = count[idx]
    else:        
        List.append(value)
        count.append(1)        
        protests_wanted_for_js.iloc[index, protests_wanted_for_js.columns.get_loc('count')] = 1
    index = index + 1

In [18]:
protests_wanted_for_js['count'] = protests_wanted_for_js['count'].astype(str)
protests_wanted_for_js["count"] = '[' + protests_wanted_for_js['count'] + '],'

In [25]:
updating_js_script(protests_wanted_for_js, COUNTRY_MAP_PATH, markers_number=7, markers_speed=10, zoom="5", LatLong=latlong)

In [26]:
display(HTML("<h1><a href='"+COUNTRY_MAP_PATH+"index.html' target='_blank'>Animated map which displays protests day by day in "+ country_wanted + " (Ctrl+Click)</a></h1>"))