In [None]:
# Sumber File => https://www.geeksforgeeks.org/implementing-apriori-algorithm-in-python/
# Sumber Dataset => http://archive.ics.uci.edu/ml/datasets/Online+Retail


In [125]:
# Langkah 1: Mengimpor library yang diperlukan
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

In [126]:
# Langkah 2: Meload dan Mengeksplor data
# Memuat Data
data = pd.read_csv("Life Expectancy Data.csv")
data.head()

Unnamed: 0,Country,Year,Status,Life expectancy,Adult Mortality,infant deaths,Alcohol,percentage expenditure,Hepatitis B,Measles,...,Polio,Total expenditure,Diphtheria,HIV/AIDS,GDP,Population,thinness 1-19 years,thinness 5-9 years,Income composition of resources,Schooling
0,Afghanistan,2015,Developing,65.0,263.0,62,0.01,71.279624,65.0,1154,...,6.0,8.16,65.0,0.1,584.25921,33736494.0,17.2,17.3,0.479,10.1
1,Afghanistan,2014,Developing,59.9,271.0,64,0.01,73.523582,62.0,492,...,58.0,8.18,62.0,0.1,612.696514,327582.0,17.5,17.5,0.476,10.0
2,Afghanistan,2013,Developing,59.9,268.0,66,0.01,73.219243,64.0,430,...,62.0,8.13,64.0,0.1,631.744976,31731688.0,17.7,17.7,0.47,9.9
3,Afghanistan,2012,Developing,59.5,272.0,69,0.01,78.184215,67.0,2787,...,67.0,8.52,67.0,0.1,669.959,3696958.0,17.9,18.0,0.463,9.8
4,Afghanistan,2011,Developing,59.2,275.0,71,0.01,7.097109,68.0,3013,...,68.0,7.87,68.0,0.1,63.537231,2978599.0,18.2,18.2,0.454,9.5


In [127]:
# Mengeksplor kolom data
data.columns

Index(['Country', 'Year', 'Status', 'Life expectancy ', 'Adult Mortality',
       'infant deaths', 'Alcohol', 'percentage expenditure', 'Hepatitis B',
       'Measles ', ' BMI ', 'under-five deaths ', 'Polio', 'Total expenditure',
       'Diphtheria ', ' HIV/AIDS', 'GDP', 'Population',
       ' thinness  1-19 years', ' thinness 5-9 years',
       'Income composition of resources', 'Schooling'],
      dtype='object')

In [128]:
# Mengeksplor berbagai wilayah transaksi
data.Country.unique()

array(['Afghanistan', 'Albania', 'Algeria', 'Angola',
       'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia',
       'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh',
       'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan',
       'Bolivia (Plurinational State of)', 'Bosnia and Herzegovina',
       'Botswana', 'Brazil', 'Brunei Darussalam', 'Bulgaria',
       'Burkina Faso', 'Burundi', "Côte d'Ivoire", 'Cabo Verde',
       'Cambodia', 'Cameroon', 'Canada', 'Central African Republic',
       'Chad', 'Chile', 'China', 'Colombia', 'Comoros', 'Congo',
       'Cook Islands', 'Costa Rica', 'Croatia', 'Cuba', 'Cyprus',
       'Czechia', "Democratic People's Republic of Korea",
       'Democratic Republic of the Congo', 'Denmark', 'Djibouti',
       'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt',
       'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia',
       'Ethiopia', 'Fiji', 'Finland', 'France', 'Gabon', 'Gambia',
       'Georgia', 'Germany'

In [129]:
# Langkah 3: Membersihkan Data
# Menghapus ruang ekstra dalam Transmission Type
data['Status'] = data['Status'].str.strip()

# Menghapus baris tanpa nomor faktur (Life expectancy )
data.dropna(axis = 0, subset =['Life expectancy '], inplace = True)
data['Life expectancy '] = data['Life expectancy '].astype('str')

In [130]:
# Langkah 4: Memisahkan data sesuai dengan wilayah
basket_Afghanistan = (data[data['Country'] =="Afghanistan"]
          .groupby(['Life expectancy ', 'Year'])['infant deaths']
          .sum().unstack().reset_index().fillna(0)
          .set_index('Life expectancy '))
basket_Argentina = (data[data['Country'] =="Argentina"]
          .groupby(['Life expectancy ', 'Year'])['infant deaths']
          .sum().unstack().reset_index().fillna(0)
          .set_index('Life expectancy '))
basket_Azerbaijan = (data[data['Country'] =="Azerbaijan"]
          .groupby(['Life expectancy ', 'Year'])['infant deaths']
          .sum().unstack().reset_index().fillna(0)
          .set_index('Life expectancy '))

In [131]:
# Langkah 5: Pengkodean Data dengan "Hot Encoding"
# Mendefinisikan fungsi "Hot Encoding" untuk membuat data sesuai untuk library yang bersangkutan
def hot_encode(x):
    if(x<= 0):
        return 0
    if(x>= 1):
        return 1

In [132]:
# Mengkodekan dataset
basket_encoded = basket_Afghanistan.applymap(hot_encode)
basket_Afghanistan = basket_encoded

basket_encoded = basket_Argentina.applymap(hot_encode)
basket_Argentina = basket_encoded

basket_encoded = basket_Azerbaijan.applymap(hot_encode)
basket_Azerbaijan = basket_encoded

In [133]:
# Langkah 6: Membangun model dan menganalisis hasilnya
# 1) Afghanistan
frq_items = apriori(basket_Afghanistan, min_support = 0.05, use_colnames = True)
rules = association_rules(frq_items, metric ="lift", min_threshold = 1)
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False])
#print(rules.head())
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(2005),(2006),0.071429,0.071429,0.071429,1.0,14.0,0.066327,inf
1,(2006),(2005),0.071429,0.071429,0.071429,1.0,14.0,0.066327,inf
2,(2013),(2014),0.071429,0.071429,0.071429,1.0,14.0,0.066327,inf
3,(2014),(2013),0.071429,0.071429,0.071429,1.0,14.0,0.066327,inf


In [134]:
# 2) Argentina
frq_items = apriori(basket_Argentina, min_support = 0.01, use_colnames = True)
rules = association_rules(frq_items, metric ="lift", min_threshold = 1)
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False])
#print(rules.head())
rules.head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(2000),(2002),0.071429,0.071429,0.071429,1.0,14.0,0.066327,inf
1,(2002),(2000),0.071429,0.071429,0.071429,1.0,14.0,0.066327,inf
2,(2000),(2003),0.071429,0.071429,0.071429,1.0,14.0,0.066327,inf
3,(2003),(2000),0.071429,0.071429,0.071429,1.0,14.0,0.066327,inf
4,(2002),(2003),0.071429,0.071429,0.071429,1.0,14.0,0.066327,inf
5,(2003),(2002),0.071429,0.071429,0.071429,1.0,14.0,0.066327,inf
6,"(2000, 2002)",(2003),0.071429,0.071429,0.071429,1.0,14.0,0.066327,inf
7,"(2000, 2003)",(2002),0.071429,0.071429,0.071429,1.0,14.0,0.066327,inf
8,"(2002, 2003)",(2000),0.071429,0.071429,0.071429,1.0,14.0,0.066327,inf
9,(2000),"(2002, 2003)",0.071429,0.071429,0.071429,1.0,14.0,0.066327,inf


In [135]:
# 3) Azerbaijan
frq_items = apriori(basket_Azerbaijan, min_support = 0.05, use_colnames = True)
rules = association_rules(frq_items, metric ="lift", min_threshold = 1)
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False])
#print(rules.head())
rules.head(6)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(2002),(2003),0.076923,0.076923,0.076923,1.0,13.0,0.071006,inf
1,(2003),(2002),0.076923,0.076923,0.076923,1.0,13.0,0.071006,inf
2,(2004),(2005),0.076923,0.076923,0.076923,1.0,13.0,0.071006,inf
3,(2005),(2004),0.076923,0.076923,0.076923,1.0,13.0,0.071006,inf
4,(2008),(2007),0.076923,0.076923,0.076923,1.0,13.0,0.071006,inf
5,(2007),(2008),0.076923,0.076923,0.076923,1.0,13.0,0.071006,inf
