In [2]:
import pyspark
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *

spark = SparkSession.builder.master("local[*]")\
        .appName("Hello")\
        .config("spark.jars.packages","org.mongodb.spark:mongo-spark-connector_2.12:2.4.1")\
        .getOrCreate()
import warnings
warnings.filterwarnings("ignore")

In [3]:
df= spark.read.csv("database.csv",header=True)

                                                                                

In [4]:
df.take(1)

                                                                                

[Row(Date='01/02/1965', Time='13:44:18', Latitude='19.246', Longitude='145.616', Type='Earthquake', Depth='131.6', Depth Error=None, Depth Seismic Stations=None, Magnitude='6', Magnitude Type='MW', Magnitude Error=None, Magnitude Seismic Stations=None, Azimuthal Gap=None, Horizontal Distance=None, Horizontal Error=None, Root Mean Square=None, ID='ISCGEM860706', Source='ISCGEM', Location Source='ISCGEM', Magnitude Source='ISCGEM', Status='Automatic')]

In [5]:
df.printSchema()

root
 |-- Date: string (nullable = true)
 |-- Time: string (nullable = true)
 |-- Latitude: string (nullable = true)
 |-- Longitude: string (nullable = true)
 |-- Type: string (nullable = true)
 |-- Depth: string (nullable = true)
 |-- Depth Error: string (nullable = true)
 |-- Depth Seismic Stations: string (nullable = true)
 |-- Magnitude: string (nullable = true)
 |-- Magnitude Type: string (nullable = true)
 |-- Magnitude Error: string (nullable = true)
 |-- Magnitude Seismic Stations: string (nullable = true)
 |-- Azimuthal Gap: string (nullable = true)
 |-- Horizontal Distance: string (nullable = true)
 |-- Horizontal Error: string (nullable = true)
 |-- Root Mean Square: string (nullable = true)
 |-- ID: string (nullable = true)
 |-- Source: string (nullable = true)
 |-- Location Source: string (nullable = true)
 |-- Magnitude Source: string (nullable = true)
 |-- Status: string (nullable = true)



In [6]:
list_a_supp=["Depth Error","Depth Seismic Stations","Time","Magnitude Seismic Stations","Magnitude Error","Azimuthal",
            "Root Mean Square","Location Source","Magnitude Source","Source","Status","Azimuthal Gap","Horizontal Distance",
            "Horizontal Error"]  


In [7]:
df= df.drop(*list_a_supp)

In [8]:
df.printSchema()

root
 |-- Date: string (nullable = true)
 |-- Latitude: string (nullable = true)
 |-- Longitude: string (nullable = true)
 |-- Type: string (nullable = true)
 |-- Depth: string (nullable = true)
 |-- Magnitude: string (nullable = true)
 |-- Magnitude Type: string (nullable = true)
 |-- ID: string (nullable = true)



In [9]:
df.show(5)

                                                                                

+----------+--------+---------+----------+-----+---------+--------------+------------+
|      Date|Latitude|Longitude|      Type|Depth|Magnitude|Magnitude Type|          ID|
+----------+--------+---------+----------+-----+---------+--------------+------------+
|01/02/1965|  19.246|  145.616|Earthquake|131.6|        6|            MW|ISCGEM860706|
|01/04/1965|   1.863|  127.352|Earthquake|   80|      5.8|            MW|ISCGEM860737|
|01/05/1965| -20.579| -173.972|Earthquake|   20|      6.2|            MW|ISCGEM860762|
|01/08/1965| -59.076|  -23.557|Earthquake|   15|      5.8|            MW|ISCGEM860856|
|01/09/1965|  11.938|  126.427|Earthquake|   15|      5.8|            MW|ISCGEM860890|
+----------+--------+---------+----------+-----+---------+--------------+------------+
only showing top 5 rows



In [10]:
df.dtypes

[('Date', 'string'),
 ('Latitude', 'string'),
 ('Longitude', 'string'),
 ('Type', 'string'),
 ('Depth', 'string'),
 ('Magnitude', 'string'),
 ('Magnitude Type', 'string'),
 ('ID', 'string')]

In [11]:
df=df.withColumn("Year",year(to_timestamp("Date","dd/mm/yyyy")))

In [12]:
df.show(5)

+----------+--------+---------+----------+-----+---------+--------------+------------+----+
|      Date|Latitude|Longitude|      Type|Depth|Magnitude|Magnitude Type|          ID|Year|
+----------+--------+---------+----------+-----+---------+--------------+------------+----+
|01/02/1965|  19.246|  145.616|Earthquake|131.6|        6|            MW|ISCGEM860706|1965|
|01/04/1965|   1.863|  127.352|Earthquake|   80|      5.8|            MW|ISCGEM860737|1965|
|01/05/1965| -20.579| -173.972|Earthquake|   20|      6.2|            MW|ISCGEM860762|1965|
|01/08/1965| -59.076|  -23.557|Earthquake|   15|      5.8|            MW|ISCGEM860856|1965|
|01/09/1965|  11.938|  126.427|Earthquake|   15|      5.8|            MW|ISCGEM860890|1965|
+----------+--------+---------+----------+-----+---------+--------------+------------+----+
only showing top 5 rows



In [13]:
df.dtypes

[('Date', 'string'),
 ('Latitude', 'string'),
 ('Longitude', 'string'),
 ('Type', 'string'),
 ('Depth', 'string'),
 ('Magnitude', 'string'),
 ('Magnitude Type', 'string'),
 ('ID', 'string'),
 ('Year', 'int')]

In [14]:
df_freq=df.groupby("Year").count().withColumnRenamed("count","Counts")
df_freq.show(5)

[Stage 4:>                                                          (0 + 1) / 1]

+----+------+
|Year|Counts|
+----+------+
|1990|   528|
|1975|   411|
|1977|   425|
|2003|   485|
|2007|   608|
+----+------+
only showing top 5 rows



                                                                                

In [15]:
df = df.withColumn("Latitude",df.Latitude.cast(DoubleType()))\
    .withColumn("Longitude",df.Longitude.cast(DoubleType()))\
    .withColumn("Depth",df.Depth.cast(DoubleType()))\
     .withColumn("Magnitude",df.Magnitude.cast(DoubleType()))

In [16]:
df.dtypes

[('Date', 'string'),
 ('Latitude', 'double'),
 ('Longitude', 'double'),
 ('Type', 'string'),
 ('Depth', 'double'),
 ('Magnitude', 'double'),
 ('Magnitude Type', 'string'),
 ('ID', 'string'),
 ('Year', 'int')]

In [17]:
df

DataFrame[Date: string, Latitude: double, Longitude: double, Type: string, Depth: double, Magnitude: double, Magnitude Type: string, ID: string, Year: int]

In [18]:
df_max=df.groupBy("Year").max("Magnitude").withColumnRenamed("max(Magnitude)","Max_magnitude")

In [19]:
df_avg=df.groupBy("Year").avg("Magnitude").withColumnRenamed("avg(Magnitude)","Avg_magnitude")

In [20]:
df_avg.show(5)

                                                                                

+----+-----------------+
|Year|    Avg_magnitude|
+----+-----------------+
|1990|5.860624999999987|
|1975|  5.8488807785888|
|1977|5.783764705882346|
|2003|5.885731958762881|
|2007|5.886019736842098|
+----+-----------------+
only showing top 5 rows



In [21]:
df_freq=df_freq.join(df_avg,["Year"]).join(df_max,["Year"])

In [22]:
df_freq.show(5)

                                                                                

+----+------+-----------------+-------------+
|Year|Counts|    Avg_magnitude|Max_magnitude|
+----+------+-----------------+-------------+
|1990|   528|5.860624999999987|          7.8|
|1975|   411|  5.8488807785888|          7.9|
|1977|   425|5.783764705882346|          7.9|
|2003|   485|5.885731958762881|          8.3|
|2007|   608|5.886019736842098|          8.4|
+----+------+-----------------+-------------+
only showing top 5 rows



In [23]:
df.dropna()
df_freq.dropna()

DataFrame[Year: int, Counts: bigint, Avg_magnitude: double, Max_magnitude: double]

In [24]:
df.write.format("mongo")\
    .mode("overwrite")\
    .option("spark.mongodb.output.uri","mongodb://127.0.0.1:27017/Quake.quakes").save()

                                                                                

In [25]:
df_freq.write.format("mongo")\
    .mode("overwrite")\
    .option("spark.mongodb.output.uri","mongodb://127.0.0.1:27017/Quake.quakes_frequences").save()

                                                                                

$$"""
Machine \quad learning
"""$$

In [26]:
df_test=spark.read.csv("query.csv",header=True)

In [27]:
df_test.take(1)

[Row(time='2017-01-02T00:13:06.300Z', latitude='-36.0365', longitude='51.9288', depth='10', mag='5.7', magType='mwb', nst=None, gap='26', dmin='14.685', rms='1.37', net='us', id='us10007p5d', updated='2017-03-27T23:53:17.040Z', place='Southwest Indian Ridge', type='earthquake', horizontalError='10.3', depthError='1.7', magError='0.068', magNst='21', status='reviewed', locationSource='us', magSource='us')]

In [28]:
df_train=spark.read.format("mongo")\
    .option("spark.mongodb.input.uri","mongodb://127.0.0.1:27017/Quake.quakes").load()


                                                                                

In [29]:
df_train.show(5)

                                                                                

+----------+-----+------------+--------+---------+---------+--------------+----------+----+--------------------+
|      Date|Depth|          ID|Latitude|Longitude|Magnitude|Magnitude Type|      Type|Year|                 _id|
+----------+-----+------------+--------+---------+---------+--------------+----------+----+--------------------+
|01/02/1965|131.6|ISCGEM860706|  19.246|  145.616|      6.0|            MW|Earthquake|1965|{620cb49167888f77...|
|01/04/1965| 80.0|ISCGEM860737|   1.863|  127.352|      5.8|            MW|Earthquake|1965|{620cb49167888f77...|
|01/05/1965| 20.0|ISCGEM860762| -20.579| -173.972|      6.2|            MW|Earthquake|1965|{620cb49167888f77...|
|01/08/1965| 15.0|ISCGEM860856| -59.076|  -23.557|      5.8|            MW|Earthquake|1965|{620cb49167888f77...|
|01/09/1965| 15.0|ISCGEM860890|  11.938|  126.427|      5.8|            MW|Earthquake|1965|{620cb49167888f77...|
+----------+-----+------------+--------+---------+---------+--------------+----------+----+-----

In [30]:
df_test.printSchema()

root
 |-- time: string (nullable = true)
 |-- latitude: string (nullable = true)
 |-- longitude: string (nullable = true)
 |-- depth: string (nullable = true)
 |-- mag: string (nullable = true)
 |-- magType: string (nullable = true)
 |-- nst: string (nullable = true)
 |-- gap: string (nullable = true)
 |-- dmin: string (nullable = true)
 |-- rms: string (nullable = true)
 |-- net: string (nullable = true)
 |-- id: string (nullable = true)
 |-- updated: string (nullable = true)
 |-- place: string (nullable = true)
 |-- type: string (nullable = true)
 |-- horizontalError: string (nullable = true)
 |-- depthError: string (nullable = true)
 |-- magError: string (nullable = true)
 |-- magNst: string (nullable = true)
 |-- status: string (nullable = true)
 |-- locationSource: string (nullable = true)
 |-- magSource: string (nullable = true)



In [31]:
df_test2=df_test["time","latitude","longitude","mag","depth"]

In [32]:
df_test2=df_test2.withColumnRenamed("time","Date").withColumnRenamed("latitude","Latitude")\
    .withColumnRenamed("longitude","Longitude").withColumnRenamed("mag","Magnitude")\
    .withColumnRenamed("depth","Depth")

In [33]:
df_test2.show(5)

+--------------------+--------+---------+---------+------+
|                Date|Latitude|Longitude|Magnitude| Depth|
+--------------------+--------+---------+---------+------+
|2017-01-02T00:13:...|-36.0365|  51.9288|      5.7|    10|
|2017-01-02T13:13:...|  -4.895| -76.3675|      5.9|   106|
|2017-01-02T13:14:...|-23.2513| 179.2383|      6.3|551.62|
|2017-01-03T09:09:...| 24.0151|  92.0177|      5.7|    32|
|2017-01-03T21:19:...|-43.3527| -74.5017|      5.5| 10.26|
+--------------------+--------+---------+---------+------+
only showing top 5 rows



In [34]:
df_test2.dtypes

[('Date', 'string'),
 ('Latitude', 'string'),
 ('Longitude', 'string'),
 ('Magnitude', 'string'),
 ('Depth', 'string')]

In [35]:
df_test2 = df_test2.withColumn("Latitude",df_test2.Latitude.cast(DoubleType()))\
    .withColumn("Longitude",df_test2.Longitude.cast(DoubleType()))\
    .withColumn("Depth",df_test2.Depth.cast(DoubleType()))\
     .withColumn("Magnitude",df_test2.Magnitude.cast(DoubleType()))

In [36]:
df_train2=df_train["Latitude","Longitude","Magnitude","Depth"]
df_test2=df_test2["Latitude","Longitude","Magnitude","Depth"]

In [37]:
df_train2.show(5)

+--------+---------+---------+-----+
|Latitude|Longitude|Magnitude|Depth|
+--------+---------+---------+-----+
|  19.246|  145.616|      6.0|131.6|
|   1.863|  127.352|      5.8| 80.0|
| -20.579| -173.972|      6.2| 20.0|
| -59.076|  -23.557|      5.8| 15.0|
|  11.938|  126.427|      5.8| 15.0|
+--------+---------+---------+-----+
only showing top 5 rows



In [38]:
df_test2.show(5)

+--------+---------+---------+------+
|Latitude|Longitude|Magnitude| Depth|
+--------+---------+---------+------+
|-36.0365|  51.9288|      5.7|  10.0|
|  -4.895| -76.3675|      5.9| 106.0|
|-23.2513| 179.2383|      6.3|551.62|
| 24.0151|  92.0177|      5.7|  32.0|
|-43.3527| -74.5017|      5.5| 10.26|
+--------+---------+---------+------+
only showing top 5 rows



In [39]:
df_test2=df_test2.dropna()
df_train2=df_train2.dropna()

In [40]:
from pyspark.ml import Pipeline
from pyspark.ml.regression import RandomForestRegressor
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.evaluation import RegressionEvaluator


In [41]:
assembler = VectorAssembler(inputCols=["Latitude","Longitude","Depth"],outputCol="features")

In [42]:
model_de_reg = RandomForestRegressor(featuresCol="features",labelCol="Magnitude")

$$Pipeline$$

In [43]:
my_pepline = Pipeline(stages=[assembler,model_de_reg])

In [44]:
model=my_pepline.fit(df_train2)

                                                                                

$$Prediction$$

In [45]:
results_predictions=model.transform(df_test2)

In [46]:
results_predictions.show(10)

+--------+---------+---------+------+--------------------+-----------------+
|Latitude|Longitude|Magnitude| Depth|            features|       prediction|
+--------+---------+---------+------+--------------------+-----------------+
|-36.0365|  51.9288|      5.7|  10.0|[-36.0365,51.9288...|5.835490829370853|
|  -4.895| -76.3675|      5.9| 106.0|[-4.895,-76.3675,...|5.874288832185032|
|-23.2513| 179.2383|      6.3|551.62|[-23.2513,179.238...|5.898141918871643|
| 24.0151|  92.0177|      5.7|  32.0|[24.0151,92.0177,...|5.866962236130389|
|-43.3527| -74.5017|      5.5| 10.26|[-43.3527,-74.501...|5.974072076899345|
|-19.3733| 176.0518|      6.9|  12.0|[-19.3733,176.051...|5.975149823318687|
|-19.3977| 175.9532|      5.7|  10.0|[-19.3977,175.953...|5.907471599769417|
|-19.1207| 176.1875|      6.0|  10.0|[-19.1207,176.187...|5.907471599769417|
|-18.9749| 176.2872|      5.5| 19.36|[-18.9749,176.287...| 5.97107393468644|
|-17.8694| 167.1235|      5.6|  22.1|[-17.8694,167.123...|6.006757709413907|

$$Evaluation$$

In [47]:
evaluator = RegressionEvaluator(predictionCol="prediction",labelCol="Magnitude",metricName="rmse")

In [48]:
rmse=evaluator.evaluate(results_predictions)
print("Root Mean Square Error on test dataset={} ".format(rmse))

Root Mean Square Error on test dataset=0.40311309537412676 


[Stage 51:>                                                         (0 + 1) / 1]                                                                                

In [49]:
df_results_predictions=results_predictions["Latitude","Longitude","prediction"]
# df_results_predictions.show(5)
df_results_predictions=df_results_predictions.withColumnRenamed("prediction","Prediction_Magnitude")

In [50]:
df_results_predictions = df_results_predictions.withColumn("Year",lit(2022)).withColumn("RMSE",lit(rmse))
df_results_predictions.show(5)

+--------+---------+--------------------+----+-------------------+
|Latitude|Longitude|Prediction_Magnitude|Year|               RMSE|
+--------+---------+--------------------+----+-------------------+
|-36.0365|  51.9288|   5.835490829370853|2022|0.40311309537412676|
|  -4.895| -76.3675|   5.874288832185032|2022|0.40311309537412676|
|-23.2513| 179.2383|   5.898141918871643|2022|0.40311309537412676|
| 24.0151|  92.0177|   5.866962236130389|2022|0.40311309537412676|
|-43.3527| -74.5017|   5.974072076899345|2022|0.40311309537412676|
+--------+---------+--------------------+----+-------------------+
only showing top 5 rows



In [51]:
df_results_predictions.write.format("mongo")\
    .mode("overwrite")\
    .option("spark.mongodb.output.uri","mongodb://127.0.0.1:27017/Quake.results_predictions").save()

                                                                                

$$Visualisation$$

In [73]:
import pandas as pd
import bokeh
from bokeh.io import output_notebook , output_file
from   bokeh.plotting import figure, show ,ColumnDataSource
from bokeh.models.tools import HoverTool
import math
from math import pi
from bokeh.palettes import Category20c
from bokeh.transform import cumsum
# from bokeh.tile_providers import CARTODBPOSITRON
from bokeh.io import curdoc
from bokeh.themes import built_in_themes
from pymongo import MongoClient

from bokeh.tile_providers import get_provider, Vendors

In [53]:
def mongo_reader(host="127.0.0.1",port="27017", username =None , password=None , db="Quake", collection ="results_predictions"):
    mongo_uri="mongodb://{}:{}/{}.{}".format(host,port,db,collection)
    connexion = MongoClient(mongo_uri)
    db= connexion[db]
    cursor = db[collection].find()
    df=pd.DataFrame(list(cursor))
    del df["_id"]
    return df

In [115]:
df_quakes=mongo_reader(collection="quakes")
df_quakes_freq=mongo_reader(collection="quakes_frequences")
df_quakes_pred=mongo_reader(collection="results_predictions")

In [55]:
df_quakes.head(5)

Unnamed: 0,Date,Latitude,Longitude,Type,Depth,Magnitude,Magnitude Type,ID,Year
0,01/02/1965,19.246,145.616,Earthquake,131.6,6.0,MW,ISCGEM860706,1965.0
1,01/04/1965,1.863,127.352,Earthquake,80.0,5.8,MW,ISCGEM860737,1965.0
2,01/05/1965,-20.579,-173.972,Earthquake,20.0,6.2,MW,ISCGEM860762,1965.0
3,01/08/1965,-59.076,-23.557,Earthquake,15.0,5.8,MW,ISCGEM860856,1965.0
4,01/09/1965,11.938,126.427,Earthquake,15.0,5.8,MW,ISCGEM860890,1965.0


In [56]:
df_quakes_2016=df_quakes[df_quakes["Year"]==2016]

In [57]:
df_quakes_2016.head(5)

Unnamed: 0,Date,Latitude,Longitude,Type,Depth,Magnitude,Magnitude Type,ID,Year
22943,01/01/2016,-50.5575,139.4489,Earthquake,10.0,6.3,MWW,US10004ANT,2016.0
22944,01/01/2016,-28.6278,-177.281,Earthquake,34.0,5.8,MWW,US10004AQY,2016.0
22945,01/02/2016,44.8069,129.9406,Earthquake,585.47,5.8,MWW,US10004ATB,2016.0
22946,01/03/2016,24.8036,93.6505,Earthquake,55.0,6.7,MWW,US10004B2N,2016.0
22947,01/05/2016,30.6132,132.7337,Earthquake,4.71,5.8,MWW,US10004BEN,2016.0


In [60]:
df_quakes_pred.head(5)

Unnamed: 0,Latitude,Longitude,Prediction_Magnitude,Year,RMSE
0,-36.0365,51.9288,5.835491,2022,0.403113
1,-4.895,-76.3675,5.874289,2022,0.403113
2,-23.2513,179.2383,5.898142,2022,0.403113
3,24.0151,92.0177,5.866962,2022,0.403113
4,-43.3527,-74.5017,5.974072,2022,0.403113


In [58]:
output_notebook()

In [59]:
def style(plot):
    plot.title.align="center"
    plot.title.text_font_size="20pt"
    plot.title.text_font="serif"
    
    plot.xaxis.axis_label_text_font_size="14pt"
    plot.xaxis.axis_label_text_font_style="bold"
    plot.yaxis.axis_label_text_font_size="14pt"
    plot.yaxis.axis_label_text_font_style="bold"
    
    plot.xaxis.major_label_text_font_size="12pt"
    plot.yaxis.major_label_text_font_size="12pt"
    
    plot.legend.location="top_left"
    
    return plot
    

$$Geo \quad map \quad plot$$

In [203]:
def map_plot():
    lat = df_quakes_2016["Latitude"].values.tolist()
    lon = df_quakes_2016["Longitude"].values.tolist()
    
    lat_pred= df_quakes_pred["Latitude"].values.tolist()
    lon_pred= df_quakes_pred["Longitude"].values.tolist()
    
    list_lat=[]
    list_lon=[]
    list_lat_pred = []
    list_lon_pred = []
    
    i=0
    j=0
    
    #conversion in merc_projection format
    
    for i in range(len(lon)):
        r_major = 6378137.0
        x= r_major * math.radians(lon[i])
        scale = x/lon[i]
        y=180.0/math.pi * math.log(math.tan(math.pi/4.0 + lat[i]* (math.pi/180.0)/2.0))*scale
        
        list_lon.append(x)
        list_lat.append(y)
        i += 1
        
        
    for j in range(len(lon_pred)):
        r_major = 6378137.0
        x= r_major * math.radians(lon_pred[j])
        scale = x/lon_pred[j]
        y=180.0/math.pi * math.log(math.tan(math.pi/4.0 + lat[j]* (math.pi/180.0)/2.0))*scale
        
        list_lon_pred.append(x)
        list_lat_pred.append(y)
        j += 1
        
        
    df_quakes_2016["x_coords"]=list_lat
    df_quakes_2016["y_coords"]=list_lon

    df_quakes_pred["x_coords"]=list_lon_pred
    df_quakes_pred["y_coords"]=list_lat_pred

    #Scale the circles
    df_quakes_2016["Mag_size"]=df_quakes_2016["Magnitude"]*4
    df_quakes_pred["Mag_size"]=df_quakes_pred["Prediction_Magnitude"]*4

    #Create datasources

    lats = df_quakes_2016["x_coords"].tolist()
    longs = df_quakes_2016["y_coords"].tolist()
    mags = df_quakes_2016["Magnitude"].tolist()
    years = df_quakes_2016["Year"].tolist()
    mag_size = df_quakes_2016["Mag_size"].tolist()


    pred_lats = df_quakes_pred["x_coords"].tolist()
    pred_longs = df_quakes_pred["y_coords"].tolist()
    pred_mags = df_quakes_pred["Prediction_Magnitude"].tolist()
    pred_year = df_quakes_pred["Year"].tolist()
    pred_mag_size = df_quakes_pred["Mag_size"].tolist()

    # Create column datasource
    CDS = ColumnDataSource(data=dict(lat = lats,lon=longs ,mag =mags ,year = years ,mag_s=mag_size ))

    pred_CDS = ColumnDataSource(data=dict(lat_pred = pred_lats,lon_pred=pred_longs ,pred_mag =pred_mags ,year = pred_year ,pred_mag_s=pred_mag_size ))

    # Tooltips
    TOOLTIPS= [("Year","@year"),("Magnitude","@mag"),("Predicted Magnitude","@pred_mag")]
    #Create figure

    plot=figure(title="Earthquake Map", plot_width=1300 ,plot_height = 400 , x_range =(-2000000,6000000),
               y_range = (-1000000 , 7000000), tooltips = TOOLTIPS)
    plot.circle(x="lon",y="lat", size ="mag_s", fill_color = "#cc0000", fill_alpha=7.0, source=CDS,
               legend= "Quakes 2016")

    plot.circle(x="lon_pred",y="lat_pred", size ="pred_mag", fill_color = "#ccff33", fill_alpha=7.0, source=pred_CDS,
               legend= " Predicted Quakes 2017")

#     plot.add_tile(CARTODBPOSITRON)

    tile_provider = get_provider(Vendors.CARTODBPOSITRON)
    plot.add_tile(tile_provider)

    # Map style

    plot.title.align = "center"
    plot.title.text_font_size="20pt"
    plot.title.text_font= "serif"

    #he legend
    plot.legend.location = "bottom_right"
    plot.legend.background_fill_color="black"
    plot.legend.background_fill_alpha = 0.8
    plot.legend.click_policy="hide"
    plot.legend.label_text_color="white"
    plot.xaxis.visible = False
    plot.yaxis.visible = False
    plot.axis.axis_label = None
    plot.axis.visible = False
    plot.grid.grid_line_color=None

#     show(plot)
    return plot



In [204]:
map_plot()



$$Bar\quad chart$$

In [205]:
def bar_plot():
    cds= ColumnDataSource(data=dict(years=df_quakes_freq["Year"].values.tolist(),
                                   number_quakes=df_quakes_freq["Counts"].values.tolist()))
    
    #tooltips
    TOOLTIPS = [("Year","@years"),("Number of earthquakes","@number_quakes")]
    #figure
    barchart= figure(title="Frequency of Earthquakes by Year",plot_height=400, plot_width=1300,
                    x_axis_label="Year",y_axis_label="Number of occurences", x_minor_ticks=2
                     ,y_range =(0,df_quakes_freq["Counts"].max()+100), toolbar_location=None,
                    tooltips=TOOLTIPS)
    
    #Vertical Bar
    barchart.vbar(x="years" , bottom=0 , top = "number_quakes", color="#cc0010", width=0.75 , legend= "Years",
                 source=cds)
    #style
    barchart=style(barchart)
#     show(barchart)
    return barchart

In [206]:
bar_plot()



$$Magnitude \quad plot$$

In [207]:
df_quakes_freq.head(5)

Unnamed: 0,Year,Counts,Avg_magnitude,Max_magnitude
0,1990,528,5.860625,7.8
1,1975,411,5.848881,7.9
2,1977,425,5.783765,7.9
3,2003,485,5.885732,8.3
4,2007,608,5.88602,8.4


In [208]:
df_quakes_freq_sorted=df_quakes_freq.sort_values("Year")

In [209]:
df_quakes_freq_sorted.head(5)

Unnamed: 0,Year,Counts,Avg_magnitude,Max_magnitude
27,1965,339,6.014159,8.7
18,1966,234,6.04047,8.1
21,1967,255,6.003922,7.4
13,1968,305,6.078525,8.2
22,1969,323,6.00935,7.8


In [210]:
def magnitude_plot():
    cds=ColumnDataSource(data=dict(years=df_quakes_freq_sorted["Year"].values.tolist(),
                                  avg_mag=df_quakes_freq_sorted["Avg_magnitude"].round(1).values.tolist(),
                                  max_mag= df_quakes_freq_sorted["Max_magnitude"].values.tolist()))
    #tooltips
    TOOLTIPS = [("Year","@years"),("Average magnitude","@avg_mag"),("Maximum magnitude","@max_mag")]
    #create figure
    fig = figure(title="Maximum and Average Magnitude by Year", plot_width=1300, plot_height =400, 
              x_axis_label="Years", y_axis_label="Magnitude", x_minor_ticks=2 ,
               y_range=(5, df_quakes_freq["Max_magnitude"].max()+1), toolbar_location=None,
              tooltips= TOOLTIPS)
    
    # Max magnitude
    fig.line(x="years",y="max_mag", color="#cc0010", line_width=2, legend="Max Magnitude",source=cds)
    fig.circle(x="years",y="max_mag", color="#cc0010",size=8, fill_color="#cc0000",source=cds)
    
    # Average magnitude
    fig.line(x="years",y="avg_mag", color="yellow", line_width=2, legend="Average Magnitude",source=cds)
    fig.circle(x="years",y="avg_mag", color="yellow",size=8, fill_color="yellow",source=cds)
    
    
    fig=style(fig)
    
#     show(fig)
    
    return fig
    

In [211]:
magnitude_plot()



$$Grid \quad plot$$

In [212]:
output_file("dashboard.html")
#theme
curdoc().theme="dark_minimal"

In [213]:
from bokeh.layouts import gridplot

In [214]:
grid = gridplot([[map_plot()],[bar_plot()],[magnitude_plot()]])
show(grid)


