### Animated map which displays the protests locations in sequence
With:
- ##### <u> SQLDATE </u>
- ##### <u> ActionGeo_Lat </u>
- ##### <u> ActionGeo_Long </u>

In [2]:
from google.cloud import bigquery
import pandas as pd
import numpy as np
import os
from IPython.core.display import display, HTML
from functions.animated_maps_functions import updating_js_script
from functions.animated_maps_functions import converting_count_to_color
from functions.animated_maps_functions import adding_count_columns
from functions.animated_maps_functions import getting_appropriate_format_df_for_js
from functions.dataframe_functions import highlight_text

import matplotlib.pyplot as plt


DATA_PATH = "data/"
ANIMATED_MAP_PATH = 'maps/animated_maps/'
WORLD_MAP_TYPE_PATH = ANIMATED_MAP_PATH + 'world_map_type/'
WORLD_MAP_FREQ_PATH = ANIMATED_MAP_PATH + 'world_map_freq/'
COUNTRY_MAP_PATH = ANIMATED_MAP_PATH + 'individual_country_map/'

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = 'C:\\Users\\antho\\Downloads\\adafinalproject-b2214ea058a4.json'
bigquery_client = bigquery.Client()

In [3]:
#Fetching data from GDELT
## BIG DATA, 3 000 000 rows

if os.path.isfile(DATA_PATH + 'protests_location.csv') :
    #If we already saved the data, don't run the query again, just get the data from the file saved previously
    protests_df_location = pd.read_csv(DATA_PATH + 'protests_location.csv')
else:
    #query to get the date and the location of the protest events
    #Remark: '14%' filters the protest events because they all start by '14'
    query_protests_location = bigquery_client.query(
        """SELECT SQLDATE, ActionGeo_Lat, ActionGeo_Long, EventCode, ActionGeo_FullName  FROM `gdelt-bq.gdeltv2.events` 
        WHERE EventCode LIKE '14%' """)
    protests_df_location = query_protests_location.result().to_dataframe()
    # Write down a csv file
    protests_df_location.to_csv(DATA_PATH + 'protests_location.csv', index=False)
protests_df_location = protests_df_location.dropna()
print("The number of protests we got is: " + str(protests_df_location['EventCode'].count()))

The number of protests we got is: 1769903


## Animated world map: visualyzing the kind of protest
Displaying a dynamic world map which shows evolution day by day
I also allows to see which kind of protest it is thanks to color indicators

In [4]:
#Extracting ActionGeo_Lat and ActionGeo_Long
protests_without_duplicated_values = protests_df_location
#protests_without_duplicated_values['ActionGeo_Lat'] = protests_df_location['ActionGeo_Lat']
#protests_without_duplicated_values['ActionGeo_Long'] = protests_df_location['ActionGeo_Long']

#Removing duplicated values 
#The values are not really duplicated, they took place on different day
#however we don't want to display a circle within another, we only want to visualyze the kind of protest
#we will visualize the frequency on the next two maps
protests_without_duplicated_values=protests_without_duplicated_values.drop_duplicates(subset=['ActionGeo_Long', 'ActionGeo_Lat', 'EventCode'], keep=False)
protests_without_duplicated_values=protests_without_duplicated_values.sort_values('SQLDATE')
date_start = 20140325
date_end = 20150800
protests_without_duplicated_values = protests_without_duplicated_values[protests_without_duplicated_values.SQLDATE > date_start]
protests_without_duplicated_values = protests_without_duplicated_values[protests_without_duplicated_values.SQLDATE < date_end]


In [5]:
#getting only the first 3 digits of event code because we don't mind the other, they provide useless information for our analysis
protests_without_duplicated_values['EventCode']= protests_without_duplicated_values['EventCode'].astype(str)
protests_without_duplicated_values['EventCode']= protests_without_duplicated_values['EventCode'].str[0:3]

In [6]:
#getting a df in an appropriate format
protests_df_for_js = pd.DataFrame()
protests_df_for_js["coord_for_js"] = '[' + protests_without_duplicated_values['ActionGeo_Lat'].astype(str) + ',' + protests_without_duplicated_values['ActionGeo_Long'].astype(str) + '],'
protests_df_for_js["dates"] = protests_without_duplicated_values['SQLDATE'].astype(str)
protests_df_for_js["dates"] = '[' + protests_df_for_js['dates'].str[0:4] + protests_df_for_js['dates'].str[4:6] + protests_df_for_js['dates'].str[6:8] + '],'
protests_df_for_js["event_code"] = protests_without_duplicated_values['EventCode'].astype(str)
protests_df_for_js["event_code"] = '[' + protests_df_for_js['event_code'] + '],'

In [7]:
#Updating the js script to display result on a dynamic world map
updating_js_script(protests_df_for_js, WORLD_MAP_TYPE_PATH, markers_speed=0.1)

In [8]:
display(HTML("<h1><a href='"+WORLD_MAP_TYPE_PATH+"index.html' target='_blank'>Animated map that displays protests day by day (Ctrl+Click)</a></h1>"))

## Animated world map: visualyzing the frequency
Displaying a dynamic world map which shows how often we get a protest on an area.
The color goes from white to green then black. Areas in white color means few protests took place here, black means the opposite.

In [9]:
date_start = 20150201
date_end = 20150220
protests_world_wanted = protests_df_location
protests_world_wanted = protests_world_wanted[protests_world_wanted.SQLDATE > date_start]
protests_world_wanted = protests_world_wanted[protests_world_wanted.SQLDATE < date_end]
protests_world_wanted=protests_world_wanted.sort_values('SQLDATE')

In [10]:
#getting a dataframe with appropriate format
protests_world_wanted_for_js = getting_appropriate_format_df_for_js(protests_world_wanted)

In [11]:
#getting the number of time each value was repeted 
protests_world_wanted_for_js = adding_count_columns(protests_world_wanted_for_js)

In [12]:
protests_world_wanted_for_js['colors'] = ""
max_value = protests_world_wanted_for_js['count'].max()
min_value = protests_world_wanted_for_js['count'].min()
index = 0
for value in protests_world_wanted_for_js['coord_for_js']:
    number = protests_world_wanted_for_js['count'].iloc[index]
    protests_world_wanted_for_js.iloc[index, protests_world_wanted_for_js.columns.get_loc('colors')] = converting_count_to_color(min_value, max_value, number)
    index = index + 1

In [13]:
protests_world_wanted_for_js['colors'] = protests_world_wanted_for_js['colors'].astype(str)
protests_world_wanted_for_js["colors"] = '[' + protests_world_wanted_for_js['colors'] + '],'
for col in protests_world_wanted_for_js:
    protests_world_wanted_for_js[col] = protests_world_wanted_for_js[col].astype(str)

In [14]:
updating_js_script(protests_world_wanted_for_js, WORLD_MAP_FREQ_PATH, markers_number = 15, markers_speed=500)

In [15]:
display(HTML("<h1><a href='"+WORLD_MAP_FREQ_PATH+"index.html' target='_blank'>Animated map that displays protests day by day (Ctrl+Click)</a></h1>"))

## Animated map per country
Displaying a map focused on a country.
This country can be set with the variable in the next cell.
The map shows circle with a color code (a different color for each kind of protest with a legend displayed). The circle grows up each time a new protest takes place on the same location.

In [16]:
country_wanted = "United States"

In [17]:
country_location = pd.read_csv(DATA_PATH + 'country_lat_long.csv')
for cln in country_location:
    country_location[cln] = country_location[cln].astype(str)
country_location['LatLong'] = '['+country_location['Latitude']+','+country_location['Longitude']+']'

In [18]:
s = pd.Series(country_location['Country Name'])
if country_wanted in s.unique():
    latlong = country_location[country_location['Country Name'].str.contains(country_wanted)]['LatLong']
    latlong = latlong.iloc[0]
else: 
    print("Country wanted doesn't exist")
date_start = 20161108
date_end = 20161111

In [19]:
protests_wanted = protests_df_location[protests_df_location['ActionGeo_FullName'].str.contains(country_wanted)]
protests_wanted = protests_wanted[protests_wanted.SQLDATE > date_start]
protests_wanted = protests_wanted[protests_wanted.SQLDATE < date_end]
protests_wanted=protests_wanted.sort_values('SQLDATE')
protests_wanted['count'] = 0

In [20]:
#getting a dataframe with appropriate format
protests_wanted_for_js = getting_appropriate_format_df_for_js(protests_wanted)

In [21]:
#getting the number of time each value was repeted 
protests_wanted_for_js = adding_count_columns(protests_wanted_for_js)

In [22]:
protests_wanted_for_js['count'] = protests_wanted_for_js['count'].astype(str)
protests_wanted_for_js["count"] = '[' + protests_wanted_for_js['count'] + '],'

In [23]:
updating_js_script(protests_wanted_for_js, COUNTRY_MAP_PATH, markers_number=7, markers_speed=0.1, zoom="5", LatLong=latlong)

In [24]:
display(HTML("<h1><a href='"+COUNTRY_MAP_PATH+"index.html' target='_blank'>Animated map which displays protests day by day in "+ country_wanted + " (Ctrl+Click)</a></h1>"))

# Where do we have the most protests? Why?

In [25]:
protests_ = protests_df_location
protests_ = protests_.groupby(['SQLDATE','ActionGeo_FullName']).size().reset_index(name='count').sort_values('count', ascending=False).head(22)

In [26]:
protests_df_location.head()

Unnamed: 0,SQLDATE,ActionGeo_Lat,ActionGeo_Long,EventCode,ActionGeo_FullName
0,20160520,42.5,19.3,1411,Montenegro
1,20160520,42.5,19.3,1411,Montenegro
2,20160520,50.5,4.75,1412,"Wallonia, Waals Gewest, Belgium"
3,20160520,50.5,4.75,1412,"Wallonia, Waals Gewest, Belgium"
4,20160520,-1.80409,37.6203,1411,"Makueni, Eastern, Kenya"


In [27]:
protests_.style.apply(highlight_text, subset=['ActionGeo_FullName'], text = 'United States', color = 'gray')

Unnamed: 0,SQLDATE,ActionGeo_FullName,count
300787,20160708,"Dallas, Texas, United States",327
179192,20160104,"Riyadh, Ar Riya?, Saudi Arabia",267
301465,20160709,"Dallas, Texas, United States",264
306192,20160716,"Ankara, Ankara, Turkey",252
492186,20170814,"Charlottesville, Virginia, United States",249
306436,20160716,"Istanbul, Istanbul, Turkey",245
610809,20180618,"Skopje, Macedonia (general), Macedonia",243
178677,20160103,"Riyadh, Ar Riya?, Saudi Arabia",223
492824,20170815,"Charlottesville, Virginia, United States",216
302642,20160711,"Dallas, Texas, United States",204


We can see that, more than the half took place in the US. Plus, it was mostly around 8th July 2016. After some online researches, we found that it was about shooting of Dallas police officers:
https://en.wikipedia.org/wiki/2016_shooting_of_Dallas_police_officers

In [None]:
test = protests_df_location
test['SQLDATE'] = test['SQLDATE'].astype(str).str[0:4]
test.head()
test = test.groupby(['SQLDATE','ActionGeo_FullName']).size().reset_index(name='count').sort_values('count', ascending=False)
test = test[test['ActionGeo_FullName'].str.contains('United States')]
test.count()

In [None]:
# libraries and data
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
 
# Make a data frame
df=pd.DataFrame({'x': range(1,11), 'y1': np.random.randn(10), 'y2': np.random.randn(10)+range(1,11), 'y3': np.random.randn(10)+range(11,21), 'y4': np.random.randn(10)+range(6,16), 'y5': np.random.randn(10)+range(4,14)+(0,0,0,0,0,0,0,-3,-8,-6), 'y6': np.random.randn(10)+range(2,12), 'y7': np.random.randn(10)+range(5,15), 'y8': np.random.randn(10)+range(4,14), 'y9': np.random.randn(10)+range(4,14), 'y10': np.random.randn(10)+range(2,12) })
 
# style
plt.style.use('seaborn-darkgrid')
 
# create a color palette
palette = plt.get_cmap('Set1')
 
# multiple line plot
num=0
for column in test.drop('SQLDATE', axis=1):
    num+=1
    plt.plot(test['SQLDATE'], test[column], marker='', color=palette(num), linewidth=1, alpha=0.9, label=column)
 
# Add legend
plt.legend(loc=2, ncol=2)
 
# Add titles
plt.title("A (bad) Spaghetti plot", loc='left', fontsize=12, fontweight=0, color='orange')
plt.xlabel("Time")
plt.ylabel("Score")


Text(0,0.5,'Score')

In [11]:
# import the library
import folium
import pandas as pd
 
# Make a data frame with dots to show on the map
data = pd.DataFrame({
   'lat':[-58, 2, 145, 30.32, -4.03, -73.57, 36.82, -38.5],
   'lon':[-34, 49, -38, 59.93, 5.33, 45.52, -1.29, -12.97],
   'name':['Buenos Aires', 'Paris', 'melbourne', 'St Petersbourg', 'Abidjan', 'Montreal', 'Nairobi', 'Salvador'],
   'value':[10,12,40,70,23,43,100,43]
})
data
 
# Make an empty map
m = folium.Map(location=[20,0], tiles="Mapbox Bright", zoom_start=2)
 
# I can add marker one by one on the map
for i in range(0,len(data)):
   folium.Circle(
      location=[data.iloc[i]['lon'], data.iloc[i]['lat']],
      popup=data.iloc[i]['name'],
      radius=data.iloc[i]['value']*10000,
      color='crimson',
      fill=True,
      fill_color='crimson'
   ).add_to(m)
 
# Save it as html
m.save('mymap.html')


TypeError: Object of type 'int64' is not JSON serializable