In [1]:
# import necessary libraries
from bokeh.plotting import figure, show
from bokeh.embed import components
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.transform import  factor_cmap
import pandas as pd
from pymongo import MongoClient

### Importing data into MongoDB

* start mongodb server in terminal (file path is absolute file path):

    "C:\Program Files\MongoDB\Server\7.0\bin\mongod.exe" --dbpath="c:\data\db"

* navigate to folder containing 'emdat_cleaned.json' file, open new terminal, and run the following command:

    mongoimport --type json -d project3 -c disasters --drop --jsonArray emdat_cleaned.json

In [2]:
#initiate instance of pymongo
mongo = MongoClient(port=27017)

In [3]:
#assign database to python variable
db = mongo.project3

In [4]:
#Query the 'disasters' to confirm correct data import
db.disasters.find_one()

{'_id': ObjectId('6614478beb3befb6796ea102'),
 'Disaster #': '1999-9388',
 'Subgroup': 'Climatological',
 'Type': 'Drought',
 'Subtype': 'Drought',
 'Country': 'Djibouti',
 'Region': 'Africa',
 'Geolocation': 'Bugabira',
 'Magnitude': None,
 'Lat': -2.43007525,
 'Lng': 30.01978072,
 'Year': 2001,
 'Month': 6.0,
 'Deaths': None,
 'Injuries': None,
 'Total Affected': 100000.0,
 'Insured Damage (Adjusted)': None,
 'Total Damage (Adjusted)': None}

In [5]:
#query the whole collection and convert to a list of dictionaries
from_db = db.disasters.find({})
from_db = list(from_db)

In [6]:
#convert to a pandas dataframe for analysis
data = pd.DataFrame(from_db)
data.drop('_id', axis = 1, inplace=True)
data.head()

Unnamed: 0,Disaster #,Subgroup,Type,Subtype,Country,Region,Geolocation,Magnitude,Lat,Lng,Year,Month,Deaths,Injuries,Total Affected,Insured Damage (Adjusted),Total Damage (Adjusted)
0,1999-9388,Climatological,Drought,Drought,Djibouti,Africa,Bugabira,,-2.430075,30.019781,2001,6.0,,,100000.0,,
1,1999-9388,Climatological,Drought,Drought,Djibouti,Africa,Buganda,,-2.99057,29.225784,2001,6.0,,,100000.0,,
2,1999-9388,Climatological,Drought,Drought,Djibouti,Africa,Rugombo,,-2.821939,29.094707,2001,6.0,,,100000.0,,
3,1999-9388,Climatological,Drought,Drought,Djibouti,Africa,Gihanga,,-3.197334,29.294292,2001,6.0,,,100000.0,,
4,1999-9388,Climatological,Drought,Drought,Djibouti,Africa,Busoni,,-2.473186,30.252261,2001,6.0,,,100000.0,,


In [7]:
#number of unique disaster id's in the dataset
data['Disaster #'].nunique()

5341

In [8]:
#Create df for disaster count by year
year_df = data[['Year', 'Type', 'Disaster #']].groupby(['Year', 'Type']).nunique()
year_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Disaster #
Year,Type,Unnamed: 2_level_1
2000,Drought,22
2000,Earthquake,30
2000,Extreme temperature,30
2000,Flood,146
2000,Mass movement (dry),1
...,...,...
2018,Mass movement (wet),12
2018,Storm,62
2018,Volcanic activity,6
2019,Drought,1


In [9]:
# unstack df to get a more usable dataframe for graphing
year_df = year_df.unstack(level=1)
year_df

Unnamed: 0_level_0,Disaster #,Disaster #,Disaster #,Disaster #,Disaster #,Disaster #,Disaster #,Disaster #
Type,Drought,Earthquake,Extreme temperature,Flood,Mass movement (dry),Mass movement (wet),Storm,Volcanic activity
Year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
2000,22.0,30.0,30.0,146.0,1.0,28.0,88.0,5.0
2001,14.0,24.0,23.0,149.0,,24.0,93.0,6.0
2002,25.0,34.0,15.0,163.0,1.0,20.0,91.0,7.0
2003,10.0,39.0,7.0,139.0,,21.0,68.0,2.0
2004,10.0,29.0,8.0,104.0,1.0,15.0,76.0,5.0
2005,18.0,21.0,10.0,159.0,,13.0,63.0,8.0
2006,8.0,23.0,8.0,190.0,1.0,20.0,60.0,12.0
2007,10.0,19.0,9.0,191.0,,10.0,53.0,6.0
2008,15.0,22.0,8.0,140.0,3.0,12.0,57.0,6.0
2009,14.0,19.0,10.0,128.0,1.0,29.0,57.0,2.0


In [10]:
#Create line plot of disasters over time. Output will be one graph with 8 lines (1 for each disaster type)

#Create chart format
count_by_year = figure(title="# of Disasters by Year", x_axis_label='Year', y_axis_label='# of Disasters', width = 1800, height = 500)

#generate each line of the chart
count_by_year.line(year_df.index, year_df.iloc[:,0], legend_label="Drought", color = '#ffee65')
count_by_year.line(year_df.index, year_df.iloc[:,1], legend_label="Earthquake", color = '#fdcce5')
count_by_year.line(year_df.index, year_df.iloc[:,2], legend_label="Extreme Temp.", color = '#fd7f6f')
count_by_year.line(year_df.index, year_df.iloc[:,3], legend_label="Flood", color = '#7eb0d5')
count_by_year.line(year_df.index, year_df.iloc[:,4], legend_label="Mass Mov. (Dry)", color = '#bd7ebe')
count_by_year.line(year_df.index, year_df.iloc[:,5], legend_label="Mass Mov. (Wet)", color = '#8bd3c7')
count_by_year.line(year_df.index, year_df.iloc[:,6], legend_label="Storm", color = '#b2e061')
count_by_year.line(year_df.index, year_df.iloc[:,7], legend_label="Volcanic Activity", color = '#ffb55a')

#save Javascript code and html code and show plot in browser
js_by_year, html_by_year = components(count_by_year)
#show(count_by_year)

In [None]:
#Print Javascript code for above plot to be copy and pasted into html or 'app.js'
#Optionally could be exported as a stand alone html file or .png
print(js_by_year)

#had to clear output before pushing to github to avoid errors displaying in github.
#output is in html and this seems to cause the jupyter notebook to not display in github

In [None]:
#print html <div> to be placed in html file wherever it needs to show up on the webpage
print(html_by_year)

#had to clear output before pushing to github to avoid errors displaying in github.
#output is in html and this seems to cause the jupyter notebook to not display in github

In [13]:
#get subdataframe of the disaster count for each region
region_df = data[['Region', 'Type', 'Disaster #']].groupby(['Region', 'Type']).nunique()
region_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Disaster #
Region,Type,Unnamed: 2_level_1
Africa,Drought,99
Africa,Earthquake,24
Africa,Extreme temperature,15
Africa,Flood,616
Africa,Mass movement (dry),1
Africa,Mass movement (wet),33
Africa,Storm,141
Africa,Volcanic activity,8
Americas,Drought,62
Americas,Earthquake,71


In [14]:
# unstack df to get a more usable dataframe for graphing
region_df = region_df.unstack(level=1)
region_df

Unnamed: 0_level_0,Disaster #,Disaster #,Disaster #,Disaster #,Disaster #,Disaster #,Disaster #,Disaster #
Type,Drought,Earthquake,Extreme temperature,Flood,Mass movement (dry),Mass movement (wet),Storm,Volcanic activity
Region,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Africa,99,24,15,616,1,33,141,8
Americas,62,71,48,549,3,66,387,34
Asia,72,315,94,1123,7,229,584,34
Europe,15,40,64,298,1,11,105,2
Oceania,7,19,3,71,1,7,92,15


In [15]:
#Create grouped barchart for comparing the count of disaster types per region

#Create list of factors for the barchart
x = [(x,y) for x  in region_df.index for y in region_df['Disaster #'].columns]

#define data sources for chart
sources = ColumnDataSource(data=dict(x = x, counts=region_df['Disaster #'].values))

#create chart dimensions and format
type_by_region = figure(x_range=FactorRange(*x), title=f"Disaster Count by Region", x_axis_label='Region (Disaster Type)', y_axis_label='Disaster Count',width=1500, height=500)

colors = ['#ffee65', '#fdcce5', '#fd7f6f', '#7eb0d5', '#bd7ebe', '#8bd3c7', '#b2e061', '#ffb55a']

factor_colors = factor_cmap('x', palette = colors, factors = region_df['Disaster #'].columns, start=1, end=2)

#populate chart with data. Each bar color corresponds to a disaster type
type_by_region.vbar(x = 'x', top = 'counts', width=0.8, source = sources, 
                    line_color = factor_colors,
                    fill_color = factor_colors)

#Adjust x axis lables for readability
type_by_region.xaxis.major_label_orientation = 1.5

#save Javascript code and html code and show plot in browser
js_by_region, html_by_region = components(type_by_region)
show(type_by_region)



In [None]:
# print Javascript to be copy and pasted into 'app.js'
print(js_by_region)

#had to clear output before pushing to github to avoid errors displaying in github.
#output is in html and this seems to cause the jupyter notebook to not display in github

In [None]:
#print html <div> to be placed in html file wherever it needs to show up on the webpage
print(html_by_region)

#had to clear output before pushing to github to avoid errors displaying in github.
#output is in html and this seems to cause the jupyter notebook to not display in github