In [53]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

In [54]:
data = pd.read_csv("countrycovid.csv")
data.head()

Unnamed: 0,Country,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered,Deaths / 100 Cases,Recovered / 100 Cases,Deaths / 100 Recovered,Confirmed last week,1 week change,1 week % increase,WHO Region
0,Afghanistan,36263,1269,25198,9796,106,10,18,3.5,69.49,5.04,35526,737,2.07,Eastern Mediterranean
1,Albania,4880,144,2745,1991,117,6,63,2.95,56.25,5.25,4171,709,17.0,Europe
2,Algeria,27973,1163,18837,7973,616,8,749,4.16,67.34,6.17,23691,4282,18.07,Africa
3,Andorra,907,52,803,52,10,0,0,5.73,88.53,6.48,884,23,2.6,Europe
4,Angola,950,41,242,667,18,1,0,4.32,25.47,16.94,749,201,26.84,Africa


In [55]:
data.columns

Index(['Country', 'Confirmed', 'Deaths', 'Recovered', 'Active', 'New cases',
       'New deaths', 'New recovered', 'Deaths / 100 Cases',
       'Recovered / 100 Cases', 'Deaths / 100 Recovered',
       'Confirmed last week', '1 week change', '1 week % increase',
       'WHO Region'],
      dtype='object')

In [56]:
data.Country.unique()

array(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia',
       'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh',
       'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan',
       'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'Brunei', 'Bulgaria', 'Burkina Faso', 'Burma', 'Burundi',
       'Cabo Verde', 'Cambodia', 'Cameroon', 'Canada',
       'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia',
       'Comoros', 'Congo (Brazzaville)', 'Congo (Kinshasa)', 'Costa Rica',
       "Cote d'Ivoire", 'Croatia', 'Cuba', 'Cyprus', 'Czechia', 'Denmark',
       'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt',
       'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia',
       'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France', 'Gabon',
       'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece', 'Greenland',
       'Grenada', 'Guatemala', 'Guinea', 'G

In [72]:
data['Country'] = data['Country'].str.strip()

data.dropna(axis = 0, subset =['Confirmed'], inplace = True)
data['Confirmed'] = data['Confirmed'].astype('str')

In [73]:
basket_indonesia = (data[data['Country'] =="Indonesia"]
                    .groupby(['Confirmed', 'Deaths'])['Recovered']
                    .sum().unstack().reset_index().fillna(0)
                    .set_index('Confirmed'))
basket_malaysia = (data[data['Country'] =="Malaysia"]
                    .groupby(['Confirmed', 'Deaths'])['Recovered']
                    .sum().unstack().reset_index().fillna(0)
                    .set_index('Confirmed'))
basket_canada = (data[data['Country'] =="Canada"]
                    .groupby(['Confirmed', 'Deaths'])['Recovered']
                    .sum().unstack().reset_index().fillna(0)
                    .set_index('Confirmed'))
basket_japan = (data[data['Country'] =="Japan"]
                    .groupby(['Confirmed', 'Deaths'])['Recovered']
                    .sum().unstack().reset_index().fillna(0)
                    .set_index('Confirmed'))
basket_japan

Deaths,998
Confirmed,Unnamed: 1_level_1
31142,21970


In [74]:
def hot_encode(x):
  if(x<= 0):
    return 0
  if(x>= 1):
    return 1

In [75]:
basket_encoded = basket_indonesia.applymap(hot_encode)
basket_indonesia = basket_encoded

basket_encoded = basket_malaysia.applymap(hot_encode)
basket_malaysia = basket_encoded

basket_encoded = basket_canada.applymap(hot_encode)
basket_canada = basket_encoded

basket_encoded = basket_japan.applymap(hot_encode)
basket_japan = basket_encoded
basket_canada

Deaths,8944
Confirmed,Unnamed: 1_level_1
116458,0


In [76]:
frq_items = apriori(basket_indonesia, min_support = 0.05, use_colnames = True)
rules = association_rules(frq_items, metric ="lift", min_threshold = 1)
rules = rules.sort_values(['confidence', 'lift'], ascending = [False, False])
print(rules.head())

Empty DataFrame
Columns: [antecedents, consequents, antecedent support, consequent support, support, confidence, lift, leverage, conviction]
Index: []


In [77]:
frq_items = apriori(basket_malaysia, min_support = 0.05, use_colnames = True)
rules = association_rules(frq_items, metric ="lift", min_threshold = 1)
rules = rules.sort_values(['confidence', 'lift'], ascending = [False, False])
print(rules.head())

Empty DataFrame
Columns: [antecedents, consequents, antecedent support, consequent support, support, confidence, lift, leverage, conviction]
Index: []


In [78]:
frq_items = apriori(basket_japan, min_support = 0.05, use_colnames = True)
rules = association_rules(frq_items, metric ="lift", min_threshold = 1)
rules = rules.sort_values(['confidence', 'lift'], ascending = [False, False])
print(rules.head())

Empty DataFrame
Columns: [antecedents, consequents, antecedent support, consequent support, support, confidence, lift, leverage, conviction]
Index: []
