# Visualizations

## Importing libraries and reading data

In [15]:
import pandas as pd
from sqlalchemy import create_engine

from plotly.graph_objs import Bar
import plotly.express as px

In [12]:
# load data
engine = create_engine('sqlite:///../data/DisasterResponse.db')
df = pd.read_sql_table('disaster_messages', engine)

In [48]:
def format_category_test(category):
    return category.capitalize().replace('_', ' ')

## Chart 1: Percentage of occurrences by category

In [13]:
categories_names = df.columns[4:]

In [49]:
category_mean_df = df[categories_names].mean(axis=0).reset_index()
category_mean_df = category_mean_df.rename(columns={'index':'category', 0:'mean'})
category_mean_df['category'] = category_mean_df['category'].apply(format_category_test)
category_mean_df = category_mean_df.sort_values(by='mean')

In [50]:
fig = px.bar(
    category_mean_df, 
    x="mean", 
    y="category", 
    orientation='h',
    title='Percentage of occurrences by category'
)
fig.show()

## Chart 2: Categories correlation heatmap

In [None]:
categories_names = df.columns[4:]

In [39]:
categories_df = df[categories_names].copy()
categories_df = categories_df.sum(axis=0).reset_index()
categories_df = categories_df.rename(columns={'index':'category', 0:'occurrences'})

In [42]:
most_common_categories_df = categories_df.nlargest(10, 'occurrences')
most_common_categories = most_common_categories_df.category.values
most_common_categories

array(['related', 'aid_related', 'weather_related', 'direct_report',
       'request', 'other_aid', 'food', 'earthquake', 'storm', 'shelter'],
      dtype=object)

In [53]:
heatmap_df = df[most_common_categories].copy()
heatmap_df = heatmap_df.rename(columns={col: format_category_test(col) for col in heatmap_df.columns})

In [54]:
corr_df = heatmap_df.corr().fillna(0)
corr_df

Unnamed: 0,Related,Aid related,Weather related,Direct report,Request,Other aid,Food,Earthquake,Storm,Shelter
Related,1.0,0.437321,0.322959,0.254788,0.235897,0.202302,0.184215,0.167154,0.166703,0.161804
Aid related,0.437321,1.0,0.209272,0.374864,0.448024,0.462594,0.421236,0.080541,0.060199,0.369989
Weather related,0.322959,0.209272,1.0,0.079145,-0.014546,0.169982,-0.015033,0.517571,0.516175,0.091481
Direct report,0.254788,0.374864,0.079145,1.0,0.647937,0.192838,0.297884,0.028416,0.070764,0.234496
Request,0.235897,0.448024,-0.014546,0.647937,1.0,0.243933,0.351183,0.006972,-0.045664,0.280603
Other aid,0.202302,0.462594,0.169982,0.192838,0.243933,1.0,0.036507,0.104353,0.058593,0.067187
Food,0.184215,0.421236,-0.015033,0.297884,0.351183,0.036507,1.0,-0.05646,-0.000995,0.22941
Earthquake,0.167154,0.080541,0.517571,0.028416,0.006972,0.104353,-0.05646,1.0,-0.018365,0.021372
Storm,0.166703,0.060199,0.516175,0.070764,-0.045664,0.058593,-0.000995,-0.018365,1.0,0.05475
Shelter,0.161804,0.369989,0.091481,0.234496,0.280603,0.067187,0.22941,0.021372,0.05475,1.0


In [44]:
heatmap_df = round(df[most_common_categories].corr().fillna(0),2)

In [61]:
fig = px.imshow(
    round(corr_df, 2), 
    color_continuous_scale='BuPu',
    title='Categories correlation heatmap'
)
fig.show()