##### Importing all files

In [1]:
import numpy as np
import pandas as pd
from apyori import apriori

##### Reading dataset

In [3]:
data = pd.read_csv(r'..\\Inputs\\diabetes.csv')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               768 non-null    int64  
 1   Glucose                   768 non-null    int64  
 2   BloodPressure             768 non-null    int64  
 3   SkinThickness             768 non-null    int64  
 4   Insulin                   768 non-null    int64  
 5   BMI                       768 non-null    float64
 6   DiabetesPedigreeFunction  768 non-null    float64
 7   Age                       768 non-null    int64  
 8   Outcome                   768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


In [4]:
num_records = len(data)
print(num_records)

768


##### Preprocessing data

In [5]:
data.drop(data[data.BMI == 0].index, inplace=True)
data.shape

(757, 9)

In [6]:
data.drop(data[data.BloodPressure == 0].index, inplace=True)
data.shape

(729, 9)

In [7]:
num_records = len(data)
records = []
for i in range(num_records):
    records.append([str(data.values[i,j]) for j in range(9)])

##### Applying Apriori Model

In [8]:
association_rules = apriori(records,min_support = 0.0053, min_confidence = 0.20, min_lift = 3, min_length = 2)
association_results = list(association_rules)

In [9]:
print(len(association_results))

87


##### Association Rules

In [10]:
print(association_results[0])

RelationRecord(items=frozenset({'11.0', '80.0'}), support=0.0054869684499314125, ordered_statistics=[OrderedStatistic(items_base=frozenset({'11.0'}), items_add=frozenset({'80.0'}), confidence=0.25, lift=4.142045454545455)])


##### Finding Results

In [12]:
results = []
for item in association_results:
    pair = item[0]
    items = [x for x in pair]
    value0 = str(items[0])
    value1 = str(items[1])
    value2 = str(item[1])[:7]
    value3 = str(item[2][0][2])[:7]
    value4 = str(item[2][0][3])[:7]
    
    rows = (value0,value1,value2,value3,value4)
    results.append(rows)
    Label = ['Title1', 'Title2', 'Support', 'Confidence', 'Lift']
    diabetic = pd.DataFrame.from_records(results,columns=Label)
print(diabetic)

   Title1 Title2  Support Confidence     Lift
0    11.0   80.0  0.00548       0.25  4.14204
1   112.0   24.0  0.00548    0.28571  3.41451
2   115.0   28.0  0.00548    0.28571  3.65413
3    12.0   62.0  0.00548       0.25  4.67307
4    31.0  125.0  0.00548    0.23529  4.08403
..    ...    ...      ...        ...      ...
82   64.0    2.0  0.00548    0.28571  5.34065
83    2.0   22.0  0.00548    0.57142  4.95918
84    2.0   22.0  0.00548    0.79999     7.29
85    2.0   26.0  0.00548    0.44444  7.53488
86    8.0   74.0  0.00548    0.66666  17.9999

[87 rows x 5 columns]
