In [1]:
import math
import pandas as pd
import pycountry_convert as pc
from datetime import datetime
from apyori import apriori

In [2]:
# Muat dataset gempa bumi
earthquake = pd.read_csv('significant_earthquake.csv', parse_dates=['Date'], dayfirst=True)
earthquake

Unnamed: 0,Date,Time,Latitude,Longitude,Type,Depth,Depth Error,Depth Seismic Stations,Magnitude,Magnitude Type,...,Magnitude Seismic Stations,Azimuthal Gap,Horizontal Distance,Horizontal Error,Root Mean Square,ID,Source,Location Source,Magnitude Source,Status
0,1965-02-01 00:00:00,13:44:18,19.2460,145.6160,Earthquake,131.60,,,6.0,MW,...,,,,,,ISCGEM860706,ISCGEM,ISCGEM,ISCGEM,Automatic
1,1965-04-01 00:00:00,11:29:49,1.8630,127.3520,Earthquake,80.00,,,5.8,MW,...,,,,,,ISCGEM860737,ISCGEM,ISCGEM,ISCGEM,Automatic
2,1965-05-01 00:00:00,18:05:58,-20.5790,-173.9720,Earthquake,20.00,,,6.2,MW,...,,,,,,ISCGEM860762,ISCGEM,ISCGEM,ISCGEM,Automatic
3,1965-08-01 00:00:00,18:49:43,-59.0760,-23.5570,Earthquake,15.00,,,5.8,MW,...,,,,,,ISCGEM860856,ISCGEM,ISCGEM,ISCGEM,Automatic
4,1965-09-01 00:00:00,13:32:50,11.9380,126.4270,Earthquake,15.00,,,5.8,MW,...,,,,,,ISCGEM860890,ISCGEM,ISCGEM,ISCGEM,Automatic
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23407,2016-12-28 00:00:00,08:22:12,38.3917,-118.8941,Earthquake,12.30,1.2,40.0,5.6,ML,...,18.0,42.47,0.120,,0.1898,NN00570710,NN,NN,NN,Reviewed
23408,2016-12-28 00:00:00,09:13:47,38.3777,-118.8957,Earthquake,8.80,2.0,33.0,5.5,ML,...,18.0,48.58,0.129,,0.2187,NN00570744,NN,NN,NN,Reviewed
23409,2016-12-28 00:00:00,12:38:51,36.9179,140.4262,Earthquake,10.00,1.8,,5.9,MWW,...,,91.00,0.992,4.8,1.5200,US10007NAF,US,US,US,Reviewed
23410,2016-12-29 00:00:00,22:30:19,-9.0283,118.6639,Earthquake,79.00,1.8,,6.3,MWW,...,,26.00,3.553,6.0,1.4300,US10007NL0,US,US,US,Reviewed


In [3]:
# Muat tabel translasi koordinat ke negara
country_lat_long = pd.read_csv('world_country_and_usa_states_latitude_and_longitude_values.csv')
country_lat_long.drop(axis=1, columns=['usa_state_code', 'usa_state_latitude', 'usa_state_longitude', 'usa_state'], inplace=True)

# Silly Pandas
country_lat_long.loc[country_lat_long['country_code'] != country_lat_long['country_code'], 'country_code'] = 'NA'
country_lat_long.dropna(axis=0, how='any', inplace=True)

for i in country_lat_long.iloc:
    if not (i['latitude'] == i['latitude']) or not (i['longitude'] == i['longitude']):
        print(i)
    try:
        x = math.radians(i['latitude']) + math.radians(i['longitude'])
        y = len(i['country_code'])
    except TypeError:
        print(i)


In [4]:
# Beberapa fungsi pembantu.

# https://stackoverflow.com/a/15737218
def haversine(lat1, lon1, lat2, lon2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(math.radians, [lon1, lat1, lon2, lat2])
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
    c = 2 * math.asin(math.sqrt(a)) 
    return c

def what_continent(lat, long):
    shortest = math.inf
    continent = None
    for country in country_lat_long.iloc:
        dist = haversine(lat, long, country['latitude'], country['longitude'])
        if dist < shortest:
            try:
                continent = pc.country_alpha2_to_continent_code(country['country_code'])
                shortest = dist
            except KeyError:
                pass
    return pc.convert_continent_code_to_continent_name(continent)

def get_earthquake_textual_depth(depth):
    if depth >= 300:
        return 'Deep'
    elif depth >= 70:
        return 'Intermediate'
    else:
        return 'Shallow'

In [5]:
# Pada dataset ini, ada 4 jenis: Earthquake, Explosion, Nuclear Explosion, dan Rock Burst
# Kita hanya tertarik pada data jenis Earthquake dan yang memiliki data kekuatan
earthquake_only = earthquake[(earthquake['Type'] == 'Earthquake') & (earthquake['Magnitude'] == earthquake['Magnitude'])]
earthquake_only

Unnamed: 0,Date,Time,Latitude,Longitude,Type,Depth,Depth Error,Depth Seismic Stations,Magnitude,Magnitude Type,...,Magnitude Seismic Stations,Azimuthal Gap,Horizontal Distance,Horizontal Error,Root Mean Square,ID,Source,Location Source,Magnitude Source,Status
0,1965-02-01 00:00:00,13:44:18,19.2460,145.6160,Earthquake,131.60,,,6.0,MW,...,,,,,,ISCGEM860706,ISCGEM,ISCGEM,ISCGEM,Automatic
1,1965-04-01 00:00:00,11:29:49,1.8630,127.3520,Earthquake,80.00,,,5.8,MW,...,,,,,,ISCGEM860737,ISCGEM,ISCGEM,ISCGEM,Automatic
2,1965-05-01 00:00:00,18:05:58,-20.5790,-173.9720,Earthquake,20.00,,,6.2,MW,...,,,,,,ISCGEM860762,ISCGEM,ISCGEM,ISCGEM,Automatic
3,1965-08-01 00:00:00,18:49:43,-59.0760,-23.5570,Earthquake,15.00,,,5.8,MW,...,,,,,,ISCGEM860856,ISCGEM,ISCGEM,ISCGEM,Automatic
4,1965-09-01 00:00:00,13:32:50,11.9380,126.4270,Earthquake,15.00,,,5.8,MW,...,,,,,,ISCGEM860890,ISCGEM,ISCGEM,ISCGEM,Automatic
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23407,2016-12-28 00:00:00,08:22:12,38.3917,-118.8941,Earthquake,12.30,1.2,40.0,5.6,ML,...,18.0,42.47,0.120,,0.1898,NN00570710,NN,NN,NN,Reviewed
23408,2016-12-28 00:00:00,09:13:47,38.3777,-118.8957,Earthquake,8.80,2.0,33.0,5.5,ML,...,18.0,48.58,0.129,,0.2187,NN00570744,NN,NN,NN,Reviewed
23409,2016-12-28 00:00:00,12:38:51,36.9179,140.4262,Earthquake,10.00,1.8,,5.9,MWW,...,,91.00,0.992,4.8,1.5200,US10007NAF,US,US,US,Reviewed
23410,2016-12-29 00:00:00,22:30:19,-9.0283,118.6639,Earthquake,79.00,1.8,,6.3,MWW,...,,26.00,3.553,6.0,1.4300,US10007NL0,US,US,US,Reviewed


In [6]:
for i in country_lat_long.iloc:
    try:
        x = math.radians(i['latitude']) + math.radians(i['longitude'])
    except TypeError:
        print(i, type(i['latitude']), type(i['longitude']))

In [7]:
# Sederhanakan data.
# Data gempa yang akan diambil hanyalah bulan & tahun, benua, kedalaman, dan kekuatannya dalam bilangan bulat.
new_earthquake_data_list = []

for data in earthquake_only.iloc:
    new_earthquake_data_list.append([
        int(data['Date'].strftime('%Y%m')),
        what_continent(data['Latitude'], data['Longitude']),
        get_earthquake_textual_depth(data['Depth']),
        int(data['Magnitude'])
    ])

In [8]:
# Kita akan beroperasi pada bulan & tahun kemudian menggunakan kombinasi
# 1. benua & kedalaman
# 2. benua & kekuatan
# 3. kedalaman & kekuatan

year_earthquake_index = []
earthquake_transactions = [[], [], []]

for data in new_earthquake_data_list:
    # Cari index bulan & tahun
    try:
        index = year_earthquake_index.index(data[0])
    except ValueError:
        index = len(year_earthquake_index)
        year_earthquake_index.append(data[0])
        earthquake_transactions[0].append([])
        earthquake_transactions[1].append([])
        earthquake_transactions[2].append([])
    # Proses berdasarkan kombinasi diatas
    earthquake_transactions[0][index].append(data[1] + ' ' + data[2])
    earthquake_transactions[1][index].append(data[1] + ' ' + str(data[3]))
    earthquake_transactions[2][index].append(data[2] + ' ' + str(data[3]))

In [9]:
# Jalankan algoritma apriori
earthquake_apriori = [apriori(x) for x in earthquake_transactions]
earthquake_apriori_result = [list(x) for x in earthquake_apriori]

In [10]:
# Ubah ke bentuk dataframe untuk ditampilkan simpulannya dan agar mudah disimpan ke CSV
earthquake_result_df = []

for array_result in earthquake_apriori_result:
    data = []
    for result in array_result:
        for relation_list in result.ordered_statistics:
            data.append([
                "{%s}" % ','.join(relation_list.items_base),
                "{%s}" % ','.join(relation_list.items_add),
                result.support,
                relation_list.confidence,
                relation_list.lift
            ])
    earthquake_result_df.append(pd.DataFrame(data, columns=['lhs', 'rhs', 'support', 'confidence', 'lift']))

In [11]:
# Tuliskan hasil kedalam Excel
with pd.ExcelWriter('earthquake_association_rules_apriori.xlsx') as writer:
    earthquake_result_df[0].to_excel(writer, sheet_name='Continent & Depth')
    earthquake_result_df[1].to_excel(writer, sheet_name='Continent & Magnitude')
    earthquake_result_df[2].to_excel(writer, sheet_name='Depth & Magnitude')

In [12]:
# Kesimpulan dari kombinasi benua & kedalaman
earthquake_result_df[0]

Unnamed: 0,lhs,rhs,support,confidence,lift
0,{},{Africa Shallow},0.556090,0.556090,1.000000
1,{},{Antarctica Shallow},0.370192,0.370192,1.000000
2,{},{Asia Deep},0.453526,0.453526,1.000000
3,{},{Asia Intermediate},0.756410,0.756410,1.000000
4,{},{Asia Shallow},0.998397,0.998397,1.000000
...,...,...,...,...,...
141260,"{South America Intermediate,Asia Intermediate,...",{Oceania Deep},0.131410,0.773585,1.107149
141261,"{Oceania Deep,South America Intermediate,Asia ...",{North America Shallow},0.131410,0.921348,1.003353
141262,"{Oceania Deep,South America Intermediate,Asia ...",{Europe Shallow},0.131410,0.463277,0.957234
141263,"{Oceania Deep,South America Intermediate,Asia ...",{Asia Shallow},0.131410,1.000000,1.001605


In [13]:
# Kesimpulan dari kombinasi benua & kekuatan
earthquake_result_df[1]

Unnamed: 0,lhs,rhs,support,confidence,lift
0,{},{Africa 5},0.448718,0.448718,1.000000
1,{},{Africa 6},0.200321,0.200321,1.000000
2,{},{Antarctica 5},0.259615,0.259615,1.000000
3,{},{Antarctica 6},0.128205,0.128205,1.000000
4,{},{Asia 5},0.998397,0.998397,1.000000
...,...,...,...,...,...
97784,"{South America 5,North America 5,Oceania 6,Nor...",{Oceania 5},0.107372,1.000000,1.000000
97785,"{South America 5,North America 5,Oceania 5,Oce...",{North America 6},0.107372,0.663366,1.075170
97786,"{South America 5,Oceania 5,Oceania 6,North Ame...",{North America 5},0.107372,0.870130,1.007349
97787,"{South America 5,North America 5,Oceania 5,Oce...",{Asia 6},0.107372,0.943662,1.010026


In [14]:
# Kesimpulan dari kombinasi kedalaman & kedalaman
earthquake_result_df[2]

Unnamed: 0,lhs,rhs,support,confidence,lift
0,{},{Deep 5},0.709936,0.709936,1.000000
1,{},{Deep 6},0.445513,0.445513,1.000000
2,{},{Intermediate 5},0.974359,0.974359,1.000000
3,{},{Intermediate 6},0.775641,0.775641,1.000000
4,{},{Intermediate 7},0.136218,0.136218,1.000000
...,...,...,...,...,...
2200,"{Deep 5,Shallow 6,Shallow 7,Intermediate 5,Dee...",{Shallow 5},0.144231,1.000000,1.000000
2201,"{Deep 5,Shallow 6,Shallow 7,Intermediate 5,Sha...",{Intermediate 6},0.144231,0.810811,1.045343
2202,"{Deep 5,Shallow 6,Shallow 7,Shallow 5,Deep 6,I...",{Intermediate 5},0.144231,0.967742,0.993209
2203,"{Deep 5,Shallow 6,Shallow 7,Intermediate 5,Sha...",{Deep 6},0.144231,0.491803,1.103904
