In [1]:
# import necessary libraries
from bokeh.plotting import figure, show
from bokeh.embed import components
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.transform import  factor_cmap
import pandas as pd
from pymongo import MongoClient

### Importing data into MongoDB

* start mongodb server in terminal (file path is absolute file path):

    "C:\Program Files\MongoDB\Server\7.0\bin\mongod.exe" --dbpath="c:\data\db"

* navigate to folder containing 'emdat_cleaned.json' file, open new terminal, and run the following command:

    mongoimport --type json -d project3 -c disasters --drop --jsonArray emdat_cleaned.json

In [2]:
#initiate instance of pymongo
mongo = MongoClient(port=27017)

In [3]:
#assign database to python variable
db = mongo.project3

In [4]:
#Query the 'disasters' to confirm correct data import
db.disasters.find_one()

{'_id': ObjectId('6615cbd2a3643ad13d1db15d'),
 'Disaster #': '1999-9388',
 'Subgroup': 'Climatological',
 'Type': 'Drought',
 'Subtype': 'Drought',
 'Country': 'Djibouti',
 'Region': 'Africa',
 'Geolocation': 'Amudat',
 'Magnitude': None,
 'Lat': 1.86823895,
 'Lng': 34.92124761,
 'Year': 2001,
 'Month': 6.0,
 'Deaths': None,
 'Injuries': None,
 'Total Affected': 100000.0,
 'Insured Damage (Adjusted)': None,
 'Total Damage (Adjusted)': None}

In [5]:
#query the whole collection and convert to a list of dictionaries
from_db = db.disasters.find({})
from_db = list(from_db)

In [6]:
#convert to a pandas dataframe for analysis
data = pd.DataFrame(from_db)
data.drop('_id', axis = 1, inplace=True)
data.head()

Unnamed: 0,Disaster #,Subgroup,Type,Subtype,Country,Region,Geolocation,Magnitude,Lat,Lng,Year,Month,Deaths,Injuries,Total Affected,Insured Damage (Adjusted),Total Damage (Adjusted)
0,1999-9388,Climatological,Drought,Drought,Djibouti,Africa,Amudat,,1.868239,34.921248,2001,6.0,,,100000.0,,
1,1999-9388,Climatological,Drought,Drought,Djibouti,Africa,Ceel Barde,,4.663144,43.991431,2001,6.0,,,100000.0,,
2,1999-9388,Climatological,Drought,Drought,Djibouti,Africa,Bugabira,,-2.430075,30.019781,2001,6.0,,,100000.0,,
3,1999-9388,Climatological,Drought,Drought,Djibouti,Africa,Buganda,,-2.99057,29.225784,2001,6.0,,,100000.0,,
4,1999-9388,Climatological,Drought,Drought,Djibouti,Africa,Kibungo,,-2.135743,30.568653,2001,6.0,,,100000.0,,


In [7]:
#number of unique disaster id's in the dataset
data['Disaster #'].nunique()

5341

In [8]:
#Create df for disaster count by year
year_df = data[['Year', 'Type', 'Disaster #']].groupby(['Year', 'Type']).count()
year_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Disaster #
Year,Type,Unnamed: 2_level_1
2000,Drought,403
2000,Earthquake,46
2000,Extreme temperature,248
2000,Flood,564
2000,Mass movement (dry),2
...,...,...
2018,Mass movement (wet),21
2018,Storm,632
2018,Volcanic activity,18
2019,Drought,3


In [9]:
# unstack df to get a more usable dataframe for graphing
year_df = year_df.unstack(level=1)
year_df

Unnamed: 0_level_0,Disaster #,Disaster #,Disaster #,Disaster #,Disaster #,Disaster #,Disaster #,Disaster #
Type,Drought,Earthquake,Extreme temperature,Flood,Mass movement (dry),Mass movement (wet),Storm,Volcanic activity
Year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
2000,403.0,46.0,248.0,564.0,2.0,40.0,381.0,8.0
2001,427.0,98.0,128.0,529.0,,34.0,333.0,18.0
2002,222.0,70.0,75.0,584.0,1.0,40.0,483.0,16.0
2003,251.0,81.0,2418.0,929.0,,42.0,593.0,8.0
2004,72.0,1035.0,544.0,875.0,1.0,17.0,1619.0,8.0
2005,104.0,112.0,4644.0,1091.0,,14.0,1509.0,15.0
2006,45.0,61.0,3344.0,1352.0,1.0,58.0,425.0,50.0
2007,48.0,65.0,2089.0,1209.0,,14.0,963.0,10.0
2008,114.0,51.0,110.0,1172.0,4.0,16.0,1133.0,32.0
2009,802.0,87.0,1167.0,991.0,2.0,38.0,1220.0,6.0


In [10]:
#Create line plot of disasters over time. Output will be one graph with 8 lines (1 for each disaster type)

#Create chart format
count_by_year = figure(title="# of Disasters by Year", x_axis_label='Year', y_axis_label='# of Disasters', width = 1800, height = 500)

#generate each line of the chart
count_by_year.line(year_df.index, year_df.iloc[:,0], legend_label="Drought", color = '#ffee65')
count_by_year.line(year_df.index, year_df.iloc[:,1], legend_label="Earthquake", color = '#fdcce5')
count_by_year.line(year_df.index, year_df.iloc[:,2], legend_label="Extreme Temp.", color = '#fd7f6f')
count_by_year.line(year_df.index, year_df.iloc[:,3], legend_label="Flood", color = '#7eb0d5')
count_by_year.line(year_df.index, year_df.iloc[:,4], legend_label="Mass Mov. (Dry)", color = '#bd7ebe')
count_by_year.line(year_df.index, year_df.iloc[:,5], legend_label="Mass Mov. (Wet)", color = '#8bd3c7')
count_by_year.line(year_df.index, year_df.iloc[:,6], legend_label="Storm", color = '#b2e061')
count_by_year.line(year_df.index, year_df.iloc[:,7], legend_label="Volcanic Activity", color = '#ffb55a')

#save Javascript code and html code and show plot in browser
js_by_year, html_by_year = components(count_by_year)
#show(count_by_year)

In [11]:
#Print Javascript code for above plot to be copy and pasted into html or 'app.js'
#Optionally could be exported as a stand alone html file or .png
print(js_by_year)

#had to clear output before pushing to github to avoid errors displaying in github.

    <script type="text/javascript">
        (function() {
  const fn = function() {
    Bokeh.safely(function() {
      (function(root) {
        function embed_document(root) {
        const docs_json = '{"c3864433-1b70-42fe-8208-f8e54915d991":{"version":"3.3.4","title":"Bokeh Application","roots":[{"type":"object","name":"Figure","id":"p1001","attributes":{"width":1800,"height":500,"x_range":{"type":"object","name":"DataRange1d","id":"p1002"},"y_range":{"type":"object","name":"DataRange1d","id":"p1003"},"x_scale":{"type":"object","name":"LinearScale","id":"p1011"},"y_scale":{"type":"object","name":"LinearScale","id":"p1012"},"title":{"type":"object","name":"Title","id":"p1004","attributes":{"text":"# of Disasters by Year"}},"renderers":[{"type":"object","name":"GlyphRenderer","id":"p1040","attributes":{"data_source":{"type":"object","name":"ColumnDataSource","id":"p1034","attributes":{"selected":{"type":"object","name":"Selection","id":"p1035","attributes":{"indices":[],"line_indices

In [12]:
#print html <div> to be placed in html file wherever it needs to show up on the webpage
print(html_by_year)

#had to clear output before pushing to github to avoid errors displaying in github.

<div id="b06772ef-caa6-4cc6-9770-b9f0ae033109" data-root-id="p1001" style="display: contents;"></div>


In [13]:
#get subdataframe of the disaster count for each region
region_df = data[['Region', 'Type', 'Disaster #']].groupby(['Region', 'Type']).count()
region_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Disaster #
Region,Type,Unnamed: 2_level_1
Africa,Drought,1536
Africa,Earthquake,378
Africa,Extreme temperature,66
Africa,Flood,3689
Africa,Mass movement (dry),1
Africa,Mass movement (wet),59
Africa,Storm,765
Africa,Volcanic activity,10
Americas,Drought,1107
Americas,Earthquake,318


In [14]:
# unstack df to get a more usable dataframe for graphing
region_df = region_df.unstack(level=1)
region_df

Unnamed: 0_level_0,Disaster #,Disaster #,Disaster #,Disaster #,Disaster #,Disaster #,Disaster #,Disaster #
Type,Drought,Earthquake,Extreme temperature,Flood,Mass movement (dry),Mass movement (wet),Storm,Volcanic activity
Region,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Africa,1536,378,66,3689,1,59,765,10
Americas,1107,318,1290,4230,4,107,8512,156
Asia,1175,1861,2019,6957,8,468,6951,106
Europe,291,87,23511,2934,2,16,2040,3
Oceania,59,44,10,371,2,7,464,20


In [15]:
#Create grouped barchart for comparing the count of disaster types per region

#Create list of factors for the barchart
x = [(x,y) for x  in region_df.index for y in region_df['Disaster #'].columns]

#define data sources for chart
sources = ColumnDataSource(data=dict(x = x, counts=region_df['Disaster #'].values))

#create chart dimensions and format
type_by_region = figure(x_range=FactorRange(*x), title=f"Disaster Count by Region", x_axis_label='Region (Disaster Type)', y_axis_label='Disaster Count',width=1500, height=500)

colors = ['#ffee65', '#fdcce5', '#fd7f6f', '#7eb0d5', '#bd7ebe', '#8bd3c7', '#b2e061', '#ffb55a']

factor_colors = factor_cmap('x', palette = colors, factors = region_df['Disaster #'].columns, start=1, end=2)

#populate chart with data. Each bar color corresponds to a disaster type
type_by_region.vbar(x = 'x', top = 'counts', width=0.8, source = sources, 
                    line_color = factor_colors,
                    fill_color = factor_colors)

#Adjust x axis lables for readability
type_by_region.xaxis.major_label_orientation = 1.5

#save Javascript code and html code and show plot in browser
js_by_region, html_by_region = components(type_by_region)
show(type_by_region)



In [16]:
# print Javascript to be copy and pasted into 'app.js'
print(js_by_region)

#had to clear output before pushing to github to avoid errors displaying in github.

    <script type="text/javascript">
        (function() {
  const fn = function() {
    Bokeh.safely(function() {
      (function(root) {
        function embed_document(root) {
        const docs_json = '{"7fb4f9dc-7bcb-4a97-aab3-fd9d42c9a23d":{"version":"3.3.4","title":"Bokeh Application","roots":[{"type":"object","name":"Figure","id":"p1135","attributes":{"width":1500,"height":500,"x_range":{"type":"object","name":"FactorRange","id":"p1134","attributes":{"factors":[["Africa","Drought"],["Africa","Earthquake"],["Africa","Extreme temperature"],["Africa","Flood"],["Africa","Mass movement (dry)"],["Africa","Mass movement (wet)"],["Africa","Storm"],["Africa","Volcanic activity"],["Americas","Drought"],["Americas","Earthquake"],["Americas","Extreme temperature"],["Americas","Flood"],["Americas","Mass movement (dry)"],["Americas","Mass movement (wet)"],["Americas","Storm"],["Americas","Volcanic activity"],["Asia","Drought"],["Asia","Earthquake"],["Asia","Extreme temperature"],["Asia","Floo

In [17]:
#print html <div> to be placed in html file wherever it needs to show up on the webpage
print(html_by_region)

#had to clear output before pushing to github to avoid errors displaying in github.

<div id="d2b3b9f0-f0e3-4124-a684-24847efe9679" data-root-id="p1135" style="display: contents;"></div>
