# 1. Read Dataset

In [21]:
import pandas as pd
import os

base_dir = '../dataset/phase 2/'
file_path_1 = os.path.join(base_dir, 'df_translated_google_play.csv')
file_path_2 = os.path.join(base_dir, 'df_translated_app_store.csv')

df_1 = pd.read_csv(file_path_1)
df_2 = pd.read_csv(file_path_2)

# 2. Combine Dataset From Play Store and App Store


## 2.1. App Name Matching

In [22]:
df_1['app'] = df_1['app'].replace('Hazards - Red Cross', 'Hazards Red Cross')

df_2['app'] = df_2['app'].replace('GeoNet Quake', 'GeoNet')
df_2['app'] = df_2['app'].replace('Hazards – Red Cross', 'Hazards Red Cross')
df_2['app'] = df_2['app'].replace('Disaster Alert (PDC Global)', 'Disaster Alert')
df_2['app'] = df_2['app'].replace('Earthquake+ Alerts Map & Info', 'Earthquake + Alerts Map & Info')

## 2.2. Concat Play store and App Store dataset

In [23]:
df = pd.concat([df_1[['app', 'content', 'score']], df_2[['app', 'content', 'score']]], ignore_index = True).reset_index(drop = True)

# 3. Filter Natural Disaster App Dataset

In [24]:
pure_natural_disaster_app_name = ['Earthquake Alert!', 'My Earthquake Alerts - Map', 'Earthquakes Tracker', 'Yurekuru Call', 
                        'Wind Map Hurricane Tracker 3D', 'global storms', 'FEMA',
                        'Volcanoes & Earthquakes', 'Hazards Near Me NSW', 'Disaster Alert',
                        'Tropical Hurricane Tracker', 'GeoNet', 'My Hurricane Tracker & Alerts',
                        'Emergency: Severe Weather App', 'Hurricane Tracker', 'Hazards Red Cross',
                        'NINA - Die Warn-App des BBK', 'SeaStorm Hurricane Tracker', 'National evacuation center guide',
                        'My Hurricane Tracker Pro', 'Alert SA', 'Floods Near Me NSW', 'Safety tips',
                        'Earthquake', 'Earthquake + Alerts Map & Info', 'Natural Disaster Monitor',
                        'Earthquakes Today', 'FloodAlert Waterlevel Alerts', 'NERV Disaster Prevention', 
                        'SES Assistance QLD', 'Hurricane & Typhoon Track',
                        'QuakeFeed Earthquake Tracker', 'LastQuake', 'VIC Fires', 'PREP'
                        '112 India'
                       ]

natural_disaster_app_name = ['Earthquake Alert!', 'My Earthquake Alerts - Map', 'Earthquakes Tracker', 'Yurekuru Call', 
                        'Wind Map Hurricane Tracker 3D', 'global storms', 'FEMA',
                        'Volcanoes & Earthquakes', 'Hazards Near Me NSW', 'Disaster Alert',
                        'Tropical Hurricane Tracker', 'GeoNet', 'My Hurricane Tracker & Alerts',
                        'Emergency: Severe Weather App', 'Hurricane Tracker', 'Hazards Red Cross',
                        'NINA - Die Warn-App des BBK', 'SeaStorm Hurricane Tracker', 'National evacuation center guide',
                        'My Hurricane Tracker Pro', 'Alert SA', 'Floods Near Me NSW', 'Safety tips',
                        'Earthquake', 'Earthquake + Alerts Map & Info', 'Natural Disaster Monitor',
                        'Earthquakes Today', 'FloodAlert Waterlevel Alerts', 'NERV Disaster Prevention', 
                        'SES Assistance QLD', 'Hurricane & Typhoon Track',
                        'QuakeFeed Earthquake Tracker', 'LastQuake', 'VIC Fires', 'PREP'
                        '112 India', 'VicEmergency', 'CodeRED Mobile Alert', 'myAlerts', 'SD Emergency',
                        'Emergency', 'Alertswiss', 'Alert2Me - Emergency Alerts', 'BD 999',
                        'KwiKam (Quicking Services)', 'Emergency Ready App', 'Anhaar'
                       ]

df_pure_natural_disaster = df[df.app.isin(pure_natural_disaster_app_name)]

df_natural_disaster = df[df.app.isin(natural_disaster_app_name)]
df_other_emergency = df[~df.app.isin(natural_disaster_app_name)]

## 4. Cleansing Dataset

In [25]:
def cleansing_dataset(df):
    df = df.drop_duplicates(subset=['content', 'app'])
    df = df.dropna(subset=['content'])
    df = df.assign(word_count=lambda x: x['content'].apply(lambda text: len(str(text).split()))).query('word_count > 4')
    df = df[['app', 'content', 'score']]
    df = df[df.score != 0].reset_index(drop = True)
    return df

df_natural_disaster = cleansing_dataset(df_natural_disaster)
df_other_emergency = cleansing_dataset(df_other_emergency)

In [26]:
df_natural_disaster

Unnamed: 0,app,content,score
0,Disaster Alert,Working as a Public Health Nurse I get to resp...,5
1,Disaster Alert,Nice to have before traveling to unknown terri...,5
2,Disaster Alert,I like! I'm trying to find anything about tsun...,5
3,Disaster Alert,good to have but what options are expected in ...,5
4,Disaster Alert,Shows hazards all right but refuses to send no...,2
...,...,...,...
28156,Earthquake,It works quite well even anticipates some othe...,5
28157,Earthquake,This application is very good.,5
28158,Earthquake,Data from earthquakes in Chile in the last 24 ...,2
28159,Earthquake,This is as good as earthquake apps can go. Thi...,5


In [27]:
df_other_emergency

Unnamed: 0,app,content,score
0,Emergency Alert,Use as part of CRT. Would be lost without this...,5
1,Emergency Alert,I've been using this app for many years and ne...,5
2,Emergency Alert,worked great until I updated the app. now it d...,3
3,Emergency Alert,Works Great very granular filters. I just wish...,5
4,Emergency Alert,Worked great for years then stopped working li...,5
...,...,...,...
46185,My Earthquake Alerts & Feed,Jau i uwas myself to use uthe this iup appeara...,5
46186,My Earthquake Alerts & Feed,Could color code the events to indicate time o...,3
46187,My Earthquake Alerts & Feed,Do they put South America and North America as...,1
46188,My Earthquake Alerts & Feed,This is a great app.Its is very informative an...,5


# 5. Exploratory Data Analysis

## 5.1. Count App Based on Emergency Type

In [32]:
total_pure_natural_disaster_app = len(set(df_pure_natural_disaster.app))
total_natural_disaster_app = len(set(df_natural_disaster.app))
total_other_emergency_app = len(set(df_other_emergency.app))

print("Total Pure Natural Disaster APP = ", total_pure_natural_disaster_app)
print("Total General Emergency App that has natural disaster feature = ", total_natural_disaster_app-total_pure_natural_disaster_app)
print("Total Natural Disaster APP after Adding General Emergency APP that has natural disaster feature = ", total_natural_disaster_app)
print("Total All APP = ", total_other_emergency_app+total_natural_disaster_app)

Total Pure Natural Disaster APP =  34
Total General Emergency App that has natural disaster feature =  11
Total Natural Disaster APP after Adding General Emergency APP that has natural disaster feature =  45
Total All APP =  99


## 5.2. Total Reviews

In [34]:
print("Total Pure Natural Disaster APP = ", len(df_natural_disaster))

Total Pure Natural Disaster APP =  28161
