# Load Temperature CSV

In [4]:
import findspark
findspark.init()
import pyspark
import random
sc = pyspark.SparkContext(appName="Pi")

from pyspark.sql import SQLContext
sqlContext = SQLContext(sc)

df = sqlContext.read.load('GlobalLandTemperaturesByCountry.csv', 
                          format='com.databricks.spark.csv', 
                          header='true', 
                          inferSchema='true')

## Print temperature preview

In [5]:
df.show()
df.printSchema()

+----------+-------------------+-----------------------------+-------+
|        dt| AverageTemperature|AverageTemperatureUncertainty|Country|
+----------+-------------------+-----------------------------+-------+
|1743-11-01| 4.3839999999999995|                        2.294|  Åland|
|1743-12-01|               null|                         null|  Åland|
|1744-01-01|               null|                         null|  Åland|
|1744-02-01|               null|                         null|  Åland|
|1744-03-01|               null|                         null|  Åland|
|1744-04-01|               1.53|                         4.68|  Åland|
|1744-05-01|  6.702000000000001|                        1.789|  Åland|
|1744-06-01| 11.609000000000002|                        1.577|  Åland|
|1744-07-01|             15.342|                         1.41|  Åland|
|1744-08-01|               null|                         null|  Åland|
|1744-09-01|             11.702|                        1.517|  Åland|
|1744-

# Load Country Location CSV

In [6]:
df_c = sqlContext.read.format('com.databricks.spark.csv') \
     .options(header='true', inferschema='true') \
     .load('countries.csv')

## Print location preview

In [7]:
df_c.show()
df_c.printSchema()

+-------+----------+-----------+--------------------+
|country|  latitude|  longitude|                name|
+-------+----------+-----------+--------------------+
|     AD| 42.546245|   1.601554|             Andorra|
|     AE| 23.424076|  53.847818|United Arab Emirates|
|     AF|  33.93911|  67.709953|         Afghanistan|
|     AG| 17.060816| -61.796428| Antigua and Barbuda|
|     AI| 18.220554| -63.068615|            Anguilla|
|     AL| 41.153332|  20.168331|             Albania|
|     AM| 40.069099|  45.038189|             Armenia|
|     AN| 12.226079| -69.060087|Netherlands Antilles|
|     AO|-11.202692|  17.873887|              Angola|
|     AQ|-75.250973|  -0.071389|          Antarctica|
|     AR|-38.416097| -63.616672|           Argentina|
|     AS|-14.270972|-170.132217|      American Samoa|
|     AT| 47.516231|  14.550072|             Austria|
|     AU|-25.274398| 133.775136|           Australia|
|     AW|  12.52111| -69.968338|               Aruba|
|     AZ| 40.143105|  47.576

# Load Country Population CSV

In [9]:
df_p = sqlContext.read.format('com.databricks.spark.csv') \
     .options(header='true', inferschema='true') \
     .load('population.csv')

## Print population Preview

In [10]:
df_p.show()
df_p.printSchema()

+--------------------+------------+-----------------+--------------+--------+--------+--------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+-----+----+
|             Country|Country Code|   Indicator Name|Indicator Code|   Y1960|   Y1961|   Y1962|    Y1963|    Y1964|    Y1965|    Y1966|    Y1967|    Y1968|    Y1969|    Y1970|    Y1971|    Y1972|    Y1973|    Y1974|    Y1975|    Y1976|    Y1977|    Y1978|    Y1979|    Y1980|    Y1981|    Y1982|    Y1983|    Y1984|    Y1985|    Y1986|    Y1987|    Y1988|    Y198

# Import folium

In [11]:
# FOLIUM
import folium
from folium import FeatureGroup, LayerControl, Map, CircleMarker, plugins

## Import widgets

In [12]:

from ipywidgets import widgets, interact, interactive, fixed, interact_manual, Layout
from IPython.display import display
import datetime
import calendar
from dateutil.relativedelta import *
import pandas as pd

## Create date slider

In [13]:
# Slider
slider_min_value = datetime.date(1743, 11, 1)
slider_value = slider_min_value
def f(y):
    slider_value = slider_min_value+relativedelta(months=+y)
    print(slider_value)
 
slider = widgets.IntSlider(
layout=Layout(width='500px'),
value=0,
min=0,
max=3238,
step=1,
description='Date:',
disabled=False,
continuous_update=False,
orientation='horizontal',
readout=False,
readout_format='d')

w = interact(f, y=slider)

interactive(children=(IntSlider(value=0, continuous_update=False, description='Date:', layout=Layout(width='50…

## Create choropleth and markers

In [14]:
#Create choropleth and markers
m = folium.Map(location=[51.148077, 71.3389639], zoom_start=3)

plugins.Fullscreen(
    position='topright',
    title='Expand me',
    title_cancel='Exit me',
    force_separate_button=True).add_to(m)

slider_value = slider_min_value+relativedelta(months=+slider.value)
print("Date : " + str(slider_value))

df_c.createOrReplaceTempView("countries")
df.createOrReplaceTempView("temp_table")
    
temp_table = sqlContext.sql("""
    SELECT 
        CAST(temp_table.AverageTemperature AS int) AS AverageTemperature, 
        temp_table.Country as Country, 
        temp_table.dt as dt, 
        countries.latitude as latitude, 
        countries.longitude as longitude
    FROM 
        temp_table 
    LEFT JOIN 
        countries 
            ON 
        temp_table.Country = countries.name
    WHERE 
        AverageTemperature IS NOT NULL AND
        dt='""" + str(slider_value) + "'")

m.choropleth(   geo_data='world-110m.geojson',
                name="choropleth",
                line_weight=2,                fill_opacity=0.8,
                data=temp_table.toPandas(), 
                columns=['Country', 'AverageTemperature'],
                key_on='properties.name',
                fill_color='YlOrRd',
                legend_name="Temperature",
                highlight=True
)

# CREATE MARKERS
feature_group = FeatureGroup(name='Markers')

df_p.createOrReplaceTempView("population")
year = (str(slider_value).split("-"))[0]

for country in temp_table.collect():
    if country["AverageTemperature"] != None and country["latitude"] != None:
        # Get Population
        pop_string = ""
        if int(year) >= 1960:
            pop_table = sqlContext.sql("SELECT Y" + year + " FROM population WHERE Country = '" + country["Country"] + "'")
            pop_int = (pop_table.groupBy().mean().collect())[0][0]
            if pop_int != None:
                pop_string = "<br>pop: " + str(pop_int) + "</br>"
        
        # Get Temperature
        a = "%.2f" % country["AverageTemperature"]
        popup_s = folium.Popup("<b>" + country["Country"] + "</b><br> temp: " + str(a) + "°" + pop_string)
        folium.CircleMarker([country["latitude"], country["longitude"]],
                        radius=3.5,
                        popup=popup_s,
                        color='#00',
                        fill_color='red', #814488
                        fill_opacity=1).add_to(feature_group)
feature_group.add_to(m)
folium.LayerControl().add_to(m)
m

Date : 1970-01-01
