In [2]:
!pip install apyori

Collecting apyori
  Downloading apyori-1.1.2.tar.gz (8.6 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: apyori
  Building wheel for apyori (setup.py) ... [?25l[?25hdone
  Created wheel for apyori: filename=apyori-1.1.2-py3-none-any.whl size=5954 sha256=f7c1a5e20b9c0544d23f4b6fd96ad2282c9445ca0ac5e79790c9d0b93bd34fd3
  Stored in directory: /root/.cache/pip/wheels/77/3d/a6/d317a6fb32be58a602b1e8c6b5d6f31f79322da554cad2a5ea
Successfully built apyori
Installing collected packages: apyori
Successfully installed apyori-1.1.2


In [13]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

In [15]:
Titanic = pd.read_csv("/content/TitanicData.csv")

In [16]:
Titanic

Unnamed: 0,Class,Sex,Age,Survived
1,3rd,Male,Child,No
2,3rd,Male,Child,No
3,3rd,Male,Child,No
4,3rd,Male,Child,No
5,3rd,Male,Child,No
...,...,...,...,...
2197,Crew,Female,Adult,Yes
2198,Crew,Female,Adult,Yes
2199,Crew,Female,Adult,Yes
2200,Crew,Female,Adult,Yes


We would have to convert our dataset in some way to make it look like a transacion format, this is because association rule mining algorithms like Apriori are designed to work on sets of items, not columns like "Class" or "Sex" individually.

In [17]:
# So we convert data into a transaction-like format. I want to treat each row as a set of attributes
Titanic['Class'] = 'Class_' + Titanic['Class'].astype(str)
Titanic['Sex'] = 'Sex_' + Titanic['Sex'].astype(str)
Titanic['Age'] = 'Age_' + Titanic['Age'].astype(str)
Titanic['Survived'] = 'Survived_' + Titanic['Survived'].astype(str)

In [18]:
# Create transactions
transactions = Titanic[['Class', 'Sex', 'Age', 'Survived']]

We will then proceed to perform one hot encoding so as to prepare our dataset for Mlxtend, Mlxtend is a machine learning extension, so it is like an extended version of scikit-learn, only that it has more functionalities like Association rule mining (Apriori).

In [19]:
one_hot_Titanic = pd.get_dummies(transactions.apply(lambda x: pd.Series(x.values), axis=1).stack()).groupby(level=0).sum()


In [20]:
# Now we can apply Apriori
frequent_itemsets = apriori(one_hot_Titanic, min_support=0.005, use_colnames=True)



In [21]:
# We will need to extract the association rules
rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.8)

In [22]:
# Now we filter rules with min length 2
rules = rules[rules['antecedents'].apply(lambda x: len(x) >= 1) & rules['consequents'].apply(lambda x: len(x) >= 1)]

In [23]:
# Removing the redundant rules
def is_redundant(r, rules_Titanic):
    for i, row in rules_Titanic.iterrows():
        if row['antecedents'].issubset(r['antecedents']) and \
           row['consequents'].issubset(r['consequents']) and \
           row['lift'] >= r['lift'] and row.name != r.name:
            return True
    return False

non_redundant_rules = rules[~rules.apply(lambda x: is_redundant(x, rules), axis=1)]

In [24]:
# So we will then sort by lift.
sorted_rules = non_redundant_rules.sort_values(by='lift', ascending=False)

In [25]:
# Lastly, we display the rules
for idx, rule in sorted_rules.iterrows():
    print(f"Rule: {set(rule['antecedents'])} => {set(rule['consequents'])}")
    print(f"Support: {rule['support']:.3f}, Confidence: {rule['confidence']:.3f}, Lift: {rule['lift']:.3f}")
    print("-----")

Rule: {'Survived_Yes', 'Age_Adult', 'Class_2nd'} => {'Sex_Female'}
Support: 0.036, Confidence: 0.851, Lift: 3.986
-----
Rule: {'Class_1st', 'Sex_Female'} => {'Survived_Yes', 'Age_Adult'}
Support: 0.064, Confidence: 0.966, Lift: 3.249
-----
Rule: {'Survived_No', 'Age_Child'} => {'Class_3rd'}
Support: 0.024, Confidence: 1.000, Lift: 3.118
-----
Rule: {'Age_Child', 'Class_2nd'} => {'Survived_Yes'}
Support: 0.011, Confidence: 1.000, Lift: 3.096
-----
Rule: {'Class_1st', 'Sex_Female'} => {'Survived_Yes'}
Support: 0.064, Confidence: 0.972, Lift: 3.010
-----
Rule: {'Class_Crew', 'Sex_Female'} => {'Survived_Yes', 'Age_Adult'}
Support: 0.009, Confidence: 0.870, Lift: 2.926
-----
Rule: {'Sex_Female', 'Class_2nd'} => {'Survived_Yes'}
Support: 0.042, Confidence: 0.877, Lift: 2.716
-----
Rule: {'Class_Crew', 'Sex_Female'} => {'Survived_Yes'}
Support: 0.009, Confidence: 0.870, Lift: 2.692
-----
Rule: {'Survived_No', 'Sex_Female'} => {'Class_3rd'}
Support: 0.048, Confidence: 0.841, Lift: 2.623
-----
