## Handling Missing Attributes and Rule Induction on Iris Dataset !!

In [1]:
import time
import pandas as pd
import numpy as np
pd.set_option('display.max_rows', 200)
from pandas.api.types import is_numeric_dtype
from functools import reduce
from pandas.api.types import is_numeric_dtype
from itertools import groupby

In [2]:
df = pd.read_csv('../data/Iris/test.csv')

In [3]:
df = df.drop(['Unnamed: 0', 'sort_col'], axis=1)

In [4]:
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.4,3.4,1.7,0.2,Iris-setosa
1,5.8,2.6,4,1.2,Iris-versicolor
2,5.2,4.1,1.5,0.1,Iris-setosa
3,6.1,3,4.6,1.4,Iris-versicolor
4,6.3,?,?,1.5,Iris-viginica
5,?,2.8,4.7,1.2,Iris-versicolor
6,5.1,3.4,1.5,0.2,Iris-setosa
7,5,3.4,1.6,0.4,Iris-setosa
8,5,2.3,3.3,1.0,Iris-versicolor
9,6.7,2.5,5.8,1.8,Iris-viginica


### Filling up the missing attributes with Lost Value Interpretation !!

#### Defining Goal Set !!

In [5]:
df_headers = list(df)
concept = df_headers[-1]
concept

'class'

In [6]:
#all unique concepts
concept_list = df[concept].unique()
concept_list

array(['Iris-setosa', 'Iris-versicolor', 'Iris-viginica'], dtype=object)

In [7]:
#calculating cases by concepts and making sets
U = [] #universal list containing all cases
temp_list = []
goal_list = []
for item in concept_list:
    for index, row in df.iterrows():
        U.append(index+1)
        if row[concept] == item:
            temp_list.append(index)
    goal_list.append(temp_list)
    temp_list = []

In [8]:
print(goal_list,)

[[0, 2, 6, 7], [1, 3, 5, 8], [4, 9]]


#### Building Case List !!

In [9]:
attributes = list(df)
attributes

['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']

In [10]:
case_list = []

In [11]:
def discretize(numeric_col):
    print(numeric_col)
    #Sorting the values of numeric column
    sort_col = df[numeric_col].sort_values()
   
    df['sort_col'] = sort_col.values
    point_list = df['sort_col'].unique()
    point_list = list(point_list)
    if '?' in point_list:
        point_list.remove('?')
    print(point_list)
    
    #Finding average between each two points
    avg_list = []
    for i in range(len(point_list)-1):
        avg = (float(point_list[i]) + float(point_list[i+1]))/2
        avg_list.append(round(float(avg),1))
        
    print (avg_list)
    #Performing the discretization and adding the cases
    for i in avg_list:
        case = str(numeric_col) + "," + str(round(float(point_list[0]),1)) + ".." + str(i)
        case2 = str(numeric_col) + "," + str(i) + ".." + str(round(float(point_list[len(point_list)-1]),1))
        case_list.append(case)
        case_list.append(case2)

In [12]:
#Discretization considering upto 2 decimal point
for item in attributes[:-1]:
        discretize(item)

sepal_length
['5', '5.1', '5.2', '5.4', '5.8', '6.1', '6.3', '6.7']
[5.0, 5.2, 5.3, 5.6, 5.9, 6.2, 6.5]
sepal_width
['2.3', '2.5', '2.6', '2.8', '3', '3.4', '4.1']
[2.4, 2.5, 2.7, 2.9, 3.2, 3.8]
petal_length
['1.5', '1.6', '1.7', '3.3', '4', '4.6', '4.7', '5.8']
[1.6, 1.6, 2.5, 3.6, 4.3, 4.7, 5.2]
petal_width
[0.1, 0.2, 0.4, 1.0, 1.2, 1.4, 1.5, 1.8]
[0.2, 0.3, 0.7, 1.1, 1.3, 1.4, 1.6]


In [13]:
case_list

['sepal_length,5.0..5.0',
 'sepal_length,5.0..6.7',
 'sepal_length,5.0..5.2',
 'sepal_length,5.2..6.7',
 'sepal_length,5.0..5.3',
 'sepal_length,5.3..6.7',
 'sepal_length,5.0..5.6',
 'sepal_length,5.6..6.7',
 'sepal_length,5.0..5.9',
 'sepal_length,5.9..6.7',
 'sepal_length,5.0..6.2',
 'sepal_length,6.2..6.7',
 'sepal_length,5.0..6.5',
 'sepal_length,6.5..6.7',
 'sepal_width,2.3..2.4',
 'sepal_width,2.4..4.1',
 'sepal_width,2.3..2.5',
 'sepal_width,2.5..4.1',
 'sepal_width,2.3..2.7',
 'sepal_width,2.7..4.1',
 'sepal_width,2.3..2.9',
 'sepal_width,2.9..4.1',
 'sepal_width,2.3..3.2',
 'sepal_width,3.2..4.1',
 'sepal_width,2.3..3.8',
 'sepal_width,3.8..4.1',
 'petal_length,1.5..1.6',
 'petal_length,1.6..5.8',
 'petal_length,1.5..1.6',
 'petal_length,1.6..5.8',
 'petal_length,1.5..2.5',
 'petal_length,2.5..5.8',
 'petal_length,1.5..3.6',
 'petal_length,3.6..5.8',
 'petal_length,1.5..4.3',
 'petal_length,4.3..5.8',
 'petal_length,1.5..4.7',
 'petal_length,4.7..5.8',
 'petal_length,1.5..5.2'

#### Handle Missing Attributes !!

In [14]:
#For Lost values, that case wouldn't be included in any block

In [15]:
df_headers = list(df)
concept = df_headers[-2]
concept_list = df[concept].unique()

In [16]:
concept_list

array(['Iris-setosa', 'Iris-versicolor', 'Iris-viginica'], dtype=object)

In [17]:
#calculating cases by concepts and making sets
temp_list = []
goal_list = []
for item in concept_list:
    for index, row in df.iterrows():
        if row[concept] == item:
            temp_list.append(index+1)
    goal_list.append(temp_list)
    temp_list = []

In [18]:
print(goal_list,)

[[1, 3, 7, 8], [2, 4, 6, 9], [5, 10]]


In [19]:
temp_list = []
att_val_list = []
for item in case_list:
    a,b = item.split(",") #a = attribute and b = value
  
    if ".." in b:
        start,end = b.split("..")
        for index, row in df.iterrows():
            if row[a] is not '?':
                if float(row[a]) >= float(start) and float(row[a]) <= float(end):
                    temp_list.append(index+1)
        
        att_val_list.append(temp_list)
        temp_list = []
        
    else:
        for index, row in df.iterrows():
            if type(row[a]) == list:
                tmp_list = row[a]
                for case in tmp_list:
                    if float(case) == float(b):
                        temp_list.append(index+1)
                        
            if float(row[a]) == float(b):
                temp_list.append(index+1)
       
        att_val_list.append(temp_list)
        temp_list = []

In [20]:
#Print two evenly distributed list side by side
fmt = '%-8s%-20s%s'

for i, (case, att_val) in enumerate(zip(case_list, att_val_list)):
    print(fmt % (i, case, att_val))

0       sepal_length,5.0..5.0[8, 9]
1       sepal_length,5.0..6.7[1, 2, 3, 4, 5, 7, 8, 9, 10]
2       sepal_length,5.0..5.2[3, 7, 8, 9]
3       sepal_length,5.2..6.7[1, 2, 3, 4, 5, 10]
4       sepal_length,5.0..5.3[3, 7, 8, 9]
5       sepal_length,5.3..6.7[1, 2, 4, 5, 10]
6       sepal_length,5.0..5.6[1, 3, 7, 8, 9]
7       sepal_length,5.6..6.7[2, 4, 5, 10]
8       sepal_length,5.0..5.9[1, 2, 3, 7, 8, 9]
9       sepal_length,5.9..6.7[4, 5, 10]
10      sepal_length,5.0..6.2[1, 2, 3, 4, 7, 8, 9]
11      sepal_length,6.2..6.7[5, 10]
12      sepal_length,5.0..6.5[1, 2, 3, 4, 5, 7, 8, 9]
13      sepal_length,6.5..6.7[10]
14      sepal_width,2.3..2.4[9]
15      sepal_width,2.4..4.1[1, 2, 3, 4, 6, 7, 8, 10]
16      sepal_width,2.3..2.5[9, 10]
17      sepal_width,2.5..4.1[1, 2, 3, 4, 6, 7, 8, 10]
18      sepal_width,2.3..2.7[2, 9, 10]
19      sepal_width,2.7..4.1[1, 3, 4, 6, 7, 8]
20      sepal_width,2.3..2.9[2, 6, 9, 10]
21      sepal_width,2.9..4.1[1, 3, 4, 7, 8]
22      sepal_width,2.3..3.

In [21]:
#Creating data for case and att-value list
data = {'Cases': case_list, 'att_val': att_val_list}

In [22]:
df2 = pd.DataFrame(data)

In [23]:
#Cases and corresponding att-value pairs
df2

Unnamed: 0,Cases,att_val
0,"sepal_length,5.0..5.0","[8, 9]"
1,"sepal_length,5.0..6.7","[1, 2, 3, 4, 5, 7, 8, 9, 10]"
2,"sepal_length,5.0..5.2","[3, 7, 8, 9]"
3,"sepal_length,5.2..6.7","[1, 2, 3, 4, 5, 10]"
4,"sepal_length,5.0..5.3","[3, 7, 8, 9]"
5,"sepal_length,5.3..6.7","[1, 2, 4, 5, 10]"
6,"sepal_length,5.0..5.6","[1, 3, 7, 8, 9]"
7,"sepal_length,5.6..6.7","[2, 4, 5, 10]"
8,"sepal_length,5.0..5.9","[1, 2, 3, 7, 8, 9]"
9,"sepal_length,5.9..6.7","[4, 5, 10]"


### Start of Lower/Upper & Probabilistic Approximations !!

#### Building Chacteristic Set !!

In [24]:
attributes = list(df)
del attributes[-1]

In [25]:
attributes

['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']

In [26]:
case_list = []
#Loop through all the attributes except last 2 - Concept and sort_col
for item in attributes[:-1]:
    print(item)
    
    #check for non numeric columns
    if not is_numeric_dtype(df[item]):
        temp = df[item].unique()
        for i in temp:
            if i == '?' or i == '-':
                continue
            else:
                case = item + "," + i
                case_list.append(case)

sepal_length
sepal_width
petal_length
petal_width


In [27]:
case_list

['sepal_length,5.4',
 'sepal_length,5.8',
 'sepal_length,5.2',
 'sepal_length,6.1',
 'sepal_length,6.3',
 'sepal_length,5.1',
 'sepal_length,5',
 'sepal_length,6.7',
 'sepal_width,3.4',
 'sepal_width,2.6',
 'sepal_width,4.1',
 'sepal_width,3',
 'sepal_width,2.8',
 'sepal_width,2.3',
 'sepal_width,2.5',
 'petal_length,1.7',
 'petal_length,4',
 'petal_length,1.5',
 'petal_length,4.6',
 'petal_length,4.7',
 'petal_length,1.6',
 'petal_length,3.3',
 'petal_length,5.8']

In [28]:
temp_list = []
att_val_list = []
for item in case_list:
    a,b = item.split(",") #a = attribute and b = value
    if ".." in b:
        start,end = b.split("..")
        for index, row in df.iterrows():
            if row[a] >= int(start) and row[a] <= int(end):
                temp_list.append(index+1)
        print(temp_list)
        att_val_list.append(temp_list)
        temp_list = []
        
    else:
        for index, row in df.iterrows():
            if type(row[a]) == list:
                tmp_list = row[a]
                for case in tmp_list:
                    if case == b:
                        temp_list.append(index+1)
                        
            if row[a] == b:
                temp_list.append(index+1)
        print(temp_list)
        att_val_list.append(temp_list)
        temp_list = []

[1]
[2]
[3]
[4]
[5]
[7]
[8, 9]
[10]
[1, 7, 8]
[2]
[3]
[4]
[6]
[9]
[10]
[1]
[2]
[3, 7]
[4]
[6]
[8]
[9]
[10]


In [29]:
#Print two evenly distributed list side by side
fmt = '%-8s%-20s%s'

for i, (case, att_val) in enumerate(zip(case_list, att_val_list)):
    print(fmt % (i, case, att_val))

0       sepal_length,5.4    [1]
1       sepal_length,5.8    [2]
2       sepal_length,5.2    [3]
3       sepal_length,6.1    [4]
4       sepal_length,6.3    [5]
5       sepal_length,5.1    [7]
6       sepal_length,5      [8, 9]
7       sepal_length,6.7    [10]
8       sepal_width,3.4     [1, 7, 8]
9       sepal_width,2.6     [2]
10      sepal_width,4.1     [3]
11      sepal_width,3       [4]
12      sepal_width,2.8     [6]
13      sepal_width,2.3     [9]
14      sepal_width,2.5     [10]
15      petal_length,1.7    [1]
16      petal_length,4      [2]
17      petal_length,1.5    [3, 7]
18      petal_length,4.6    [4]
19      petal_length,4.7    [6]
20      petal_length,1.6    [8]
21      petal_length,3.3    [9]
22      petal_length,5.8    [10]


In [30]:
U = set(U)
print(U,)

{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}


In [31]:
# Creating dictionary combining case_list and att_val list
block = dict(zip(case_list, att_val_list))
block

{'sepal_length,5.4': [1],
 'sepal_length,5.8': [2],
 'sepal_length,5.2': [3],
 'sepal_length,6.1': [4],
 'sepal_length,6.3': [5],
 'sepal_length,5.1': [7],
 'sepal_length,5': [8, 9],
 'sepal_length,6.7': [10],
 'sepal_width,3.4': [1, 7, 8],
 'sepal_width,2.6': [2],
 'sepal_width,4.1': [3],
 'sepal_width,3': [4],
 'sepal_width,2.8': [6],
 'sepal_width,2.3': [9],
 'sepal_width,2.5': [10],
 'petal_length,1.7': [1],
 'petal_length,4': [2],
 'petal_length,1.5': [3, 7],
 'petal_length,4.6': [4],
 'petal_length,4.7': [6],
 'petal_length,1.6': [8],
 'petal_length,3.3': [9],
 'petal_length,5.8': [10]}

In [32]:
dic = {}
for index, row in df.iterrows():
    tmp_set = set()
    final_union = []
    char_list = []
    char_list_2 = []
    final_union_set = []
  
    for cols in attributes[:-1]:
        #If the value for corresponding attribute is a list then create all of the att-value pairs
        if type(df.loc[index,cols]) == list:
            print("When values are list") #########
            for item in df.loc[index,cols]:
                block_key = cols + "," + item
                char_list.append(block_key) #char_list has all att-val cases
                print(char_list) #########
                
            union_set = set()
            #Compute union of att-concept value case
            for item in char_list:
                union_set = union_set.union(set(block[item]))
            
            print("Union Set: ", union_set) #########
            final_union.append(union_set)
        
        else:
            print("When value is single") #########
            block_key = cols + "," + str(df.loc[index,cols])
            char_list_2.append(block_key) #char_list_2 has all single cases
            print(char_list_2) ##########
   
    #Compute instersection for this current row for Characteristics set
    
    print("final_union: ", final_union) ########
    if len(final_union):
        final_union_set = list(reduce(set.intersection, [set(item) for item in final_union]))
        
    
    print("Final Union Set: ", final_union_set) ##########
    
    for item in char_list_2:
        if item in block:
            print("When item is found as key in the block") ########
            if tmp_set == set():
                #Copy over the current set elements to B
                for i in range(len(block[item])):
                    tmp_set.add(block[item][i])
                    
            tmp_set = tmp_set.intersection(set(block[item]))
            print(tmp_set) ########
        
        #If item not in block
        else:
            print("When item is not found as key in the block") ########
            print(tmp_set) ########
            print(U) #########
            if tmp_set == set():
                tmp_set = U
                
            tmp_set = tmp_set.intersection(U)
            print(tmp_set) ########
    
    print("Final tmp_set: ", tmp_set) #########
    final_union_set = set(final_union_set)
    
    if final_union_set == set():
        tmp_set = tmp_set
    else:
        tmp_set = tmp_set.intersection(final_union_set)
        
    print("This is the final value: ", tmp_set) ###########
        
    key = ('K_%d' % (index+1))
    print(key) #########
    dic[key] = tmp_set
    print("\n") ##########

When value is single
['sepal_length,5.4']
When value is single
['sepal_length,5.4', 'sepal_width,3.4']
When value is single
['sepal_length,5.4', 'sepal_width,3.4', 'petal_length,1.7']
When value is single
['sepal_length,5.4', 'sepal_width,3.4', 'petal_length,1.7', 'petal_width,0.2']
final_union:  []
Final Union Set:  []
When item is found as key in the block
{1}
When item is found as key in the block
{1}
When item is found as key in the block
{1}
When item is not found as key in the block
{1}
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
{1}
Final tmp_set:  {1}
This is the final value:  {1}
K_1


When value is single
['sepal_length,5.8']
When value is single
['sepal_length,5.8', 'sepal_width,2.6']
When value is single
['sepal_length,5.8', 'sepal_width,2.6', 'petal_length,4']
When value is single
['sepal_length,5.8', 'sepal_width,2.6', 'petal_length,4', 'petal_width,1.2']
final_union:  []
Final Union Set:  []
When item is found as key in the block
{2}
When item is found as key in the block
{2}
When i

In [33]:
dic

{'K_1': {1},
 'K_2': {2},
 'K_3': {3},
 'K_4': {4},
 'K_5': {5},
 'K_6': {6},
 'K_7': {7},
 'K_8': {8},
 'K_9': {9},
 'K_10': {10}}

#### Lower Approximation !!

In [34]:
def lowerApproximation(charac_set,concept):
    #set to contain lower approximations
    lower = set()
    
    #Check for each element of the concept
    for item in concept:
        key = 'K_%d' % (item)
        set_value = charac_set[key]
  
        if set_value.issubset(set(concept)):
            lower = lower.union(set_value)
          
    return lower

In [35]:
lower_approximations = {}
for item in goal_list:
    #Key is the string converted list so as to add as dictionary key
    lower_approximations[str(item)] = lowerApproximation(dic,item)

In [36]:
lower_approximations

{'[1, 3, 7, 8]': {1, 3, 7, 8},
 '[2, 4, 6, 9]': {2, 4, 6, 9},
 '[5, 10]': {5, 10}}

In [37]:
lower_goal_list=list(lower_approximations.values())
print(lower_goal_list)

[{8, 1, 3, 7}, {9, 2, 4, 6}, {10, 5}]


#### Upper Approximation !!

In [38]:
def upperApproximation(charac_set,concept):
    #set to contain lower approximations
    upper = set()
    
    #Check for each element of the concept
    for item in concept:
        key = 'K_%d' % (item)
        set_value = charac_set[key]
        upper = upper.union(set_value)
          
    return upper

In [39]:
upper_approximations = {}
for item in goal_list:
    #Key is the string converted list so as to add as dictionary key
    upper_approximations[str(item)] = upperApproximation(dic,item)

In [40]:
upper_approximations

{'[1, 3, 7, 8]': {1, 3, 7, 8},
 '[2, 4, 6, 9]': {2, 4, 6, 9},
 '[5, 10]': {5, 10}}

In [41]:
upper_goal_list=list(lower_approximations.values())
print(upper_goal_list)

[{8, 1, 3, 7}, {9, 2, 4, 6}, {10, 5}]


#### Middle Approximations !!

In [42]:
first_column = list(dic.keys())

In [43]:
second_column = list(dic.values())

In [44]:
prob_approx = pd.DataFrame(
    {'charset_name': first_column,
     'charset_value': second_column
    })

In [45]:
prob_approx

Unnamed: 0,charset_name,charset_value
0,K_1,{1}
1,K_2,{2}
2,K_3,{3}
3,K_4,{4}
4,K_5,{5}
5,K_6,{6}
6,K_7,{7}
7,K_8,{8}
8,K_9,{9}
9,K_10,{10}


In [46]:
def probabilisticApproximation(concept):
    prob = []
    for index, row in prob_approx.iterrows():
        probability_conditional = len(row['charset_value'].intersection(set(concept))) / len(row['charset_value'])
        prob.append(round(probability_conditional,2))
    return prob

In [47]:
cond_prob = probabilisticApproximation(goal_list[0]) #Need to put the goal

In [48]:
prob_approx['cond_probability'] = cond_prob

In [49]:
prob_approx

Unnamed: 0,charset_name,charset_value,cond_probability
0,K_1,{1},1.0
1,K_2,{2},0.0
2,K_3,{3},1.0
3,K_4,{4},0.0
4,K_5,{5},0.0
5,K_6,{6},0.0
6,K_7,{7},1.0
7,K_8,{8},1.0
8,K_9,{9},0.0
9,K_10,{10},0.0


In [50]:
def findMiddleApprox(concept):
    probapprox = set()
    for index, row in prob_approx.iterrows():
        part1,part2 = row['charset_name'].split("_")
       
        if row['cond_probability'] >= 0.50:
            if int(part2) in concept:
#                 print(row['charset_value'])
                probapprox = probapprox.union(row['charset_value'])
    return probapprox   

In [51]:
middle_approximations = {}
for item in goal_list:
    print(item)
    #Key is the string converted list so as to add as dictionary key
    middle_approximations[str(item)] = findMiddleApprox(item)

[1, 3, 7, 8]
[2, 4, 6, 9]
[5, 10]


In [52]:
middle_approximations

{'[1, 3, 7, 8]': {1, 3, 7, 8}, '[2, 4, 6, 9]': set(), '[5, 10]': set()}

In [53]:
middle_goal_list=list(lower_approximations.values())
print(middle_goal_list)

[{8, 1, 3, 7}, {9, 2, 4, 6}, {10, 5}]


In [54]:
df2

Unnamed: 0,Cases,att_val
0,"sepal_length,5.0..5.0","[8, 9]"
1,"sepal_length,5.0..6.7","[1, 2, 3, 4, 5, 7, 8, 9, 10]"
2,"sepal_length,5.0..5.2","[3, 7, 8, 9]"
3,"sepal_length,5.2..6.7","[1, 2, 3, 4, 5, 10]"
4,"sepal_length,5.0..5.3","[3, 7, 8, 9]"
5,"sepal_length,5.3..6.7","[1, 2, 4, 5, 10]"
6,"sepal_length,5.0..5.6","[1, 3, 7, 8, 9]"
7,"sepal_length,5.6..6.7","[2, 4, 5, 10]"
8,"sepal_length,5.0..5.9","[1, 2, 3, 7, 8, 9]"
9,"sepal_length,5.9..6.7","[4, 5, 10]"


In [55]:
df3=df2

In [56]:
import pylab as pl

In [57]:
def findGoalIntersect(goal):
    goalIntersect = []
    
    for index, row in df3.iterrows():
        #List containing intersection of (a,v) pairs and goal
        goalIntersect.append(set(row['att_val']).intersection(set(goal)))
          
    #Check if goal_intersect column exists
    if 'goal_intersect' in df3:
        df3['goal_intersect'] = goalIntersect
    else:
        #Insert new column with the recent iteration
        df3.insert(2, 'goal_intersect', goalIntersect)

In [63]:
def findCases(df3):
  
    #Find the cases with maximum goal coverage
    m = max(df3['goal_intersect'], key=len)
    print(m)
    possible_cases = [i for i, j in enumerate(df3['goal_intersect'].tolist()) if j == m]
    print(possible_cases)
    
    #Index of the case covering max goal and having min no. of elements
    new_df = df3.iloc[possible_cases,:]
    print(new_df)
   
    m1 = min(new_df['att_val'], key=len)
    print(m1)
    print(new_df.keys())
    final_case = [i for i, j in enumerate(new_df['att_val'].tolist()) if j == m1]
    
    print(final_case)
    return final_case[0]

In [59]:
def combineInterval(test_condition):
    
    test_num = [] #This will contain the conditions having interval
    test_str = [] #This will contain the conditions having no interval
    
    #Loop through to seprate contions having intervals and no intervals
    for item in test_condition:
        if ".." in item:
            test_num.append(item)
        else:
            test_str.append(item)
   
    #Group the conditions having interval based on same attributes
    grouped = [list(g) for k, g in groupby(test_num, lambda s: s.partition(',')[0])]
    
    final_list = []
    
    #Actually combining the intervals
    for list1 in grouped:
        greatest = 0
        smallest = 0
        for item in list1:
            part1,part2 = item.split(",")
            start,stop = part2.split("..")
            start = float(start)
            stop = float(stop)

            if greatest == 0 and smallest == 0:
                greatest = start
                smallest = stop

            if start > greatest:
                greatest = float(start)

            if stop < smallest:
                smallest = float(stop)

        con_tmp = part1 + "," + str(greatest) + ".." + str(smallest)
        final_list.append(con_tmp)  
            
    actual_condition = final_list + test_str
    
    return actual_condition

In [60]:
def dropCondition(condition,current_goal):
    
    for item in range(0,len(condition)):
        temp_att_val = []
        temp_cond = condition.copy() #use list.copy() as equal operator simply copies over the reference
        temp_cond.remove(condition[item])
       
        #temp_cond contains the elements after removing the current element
        for i in temp_cond:
            if i is not None:
                location = df3.index[df3['Cases'] == i].tolist()
                element = df3['att_val'].loc[location[0]]
                temp_att_val.append(set(element))
          
        #temp_att_val contains the actual value set of the corresponding cases
        #Find the intersection if the set has more than one element, otherwise no need
        if len(temp_att_val) > 1:
            intersection = set.intersection(*temp_att_val)
        
            #if the set still remains a subset of the original goal after removing current element
            #then set the current element to None as we want to drop this later
            if intersection.issubset(current_goal):
                condition[item] = None
    
    condition = [x for x in condition if x is not None]
   
    return condition   

In [61]:
def stepAlgo(df3,selected_case,current_goal,B,condition,concept_curr):
    
    rule_set = []
    original_goal = current_goal.copy()
    
    while current_goal != set():

        #Check if the selected case is a subset of the current goal

        #List of current case
        A = df3['att_val'].loc[selected_case] 

        if B == set():
            #Copy over the current set elements to B
            for i in range(len(A)):
                B.add(A[i])

        #Elements of intersection of current and previous set
        A = set(A).intersection(B)
        B = A

        #Check if intersecting elements are subset of Goal
        if A.issubset(current_goal):
            print("SUBSET")
            #Current goal is updated after discarding the already covered goal by new rule
            current_goal = set(current_goal) - A

            #Extract the current case
            curr_case = df3['Cases'].loc[selected_case]
            #Add the conditions of a Rule
            condition.append(curr_case)
            
            #Check for possibility of dropping conditions
            if len(condition) > 1:
                condition = dropCondition(condition,original_goal)
            
            #Combine the interval
            if len(condition) > 1:
                condition = combineInterval(condition)
            

            #Join conditions
            cond = ""
            for item in condition:
                cond = cond + "(" + str(item) + ")" + " & "

            cond = cond[:-2] + "->"
            rule = cond + " (" + concept + "," + concept_curr + ")"
            rule_set.append(rule)

            #Reset everythng and continue for covering rest of the goal
            condition = []
            B = set()
            findGoalIntersect(current_goal)
            selected_case = findCases(df3)


        #If not a subset of current goal
        else:
            print("NOT")
            
            #Assign empty set for the selected case for next iteration
            df3['goal_intersect'].loc[selected_case] = set()
           
            #Extract the current case
            curr_case = df3['Cases'].loc[selected_case]
            #Add the case to the condition list
            condition.append(curr_case)
            
#             print(condition)
            #Check for Range overlapping of the remaining cases
            if ".." in curr_case:
                print(curr_case)
                second_part = curr_case.split(',')[1]
                start = float(second_part.split('..')[0])
                end = float(second_part.split('..')[1])
                for index, row in df3.iterrows():
                    if ".." in row['Cases']:
                        part2 = row['Cases'].split(',')[1]
                        start1 = float(part2.split('..')[0])
                        end1 = float(part2.split('..')[1])

                        #Assign blank set for cases with overlapping ranges
                        if set((pl.frange(start,end))).issubset(pl.frange(start1,end1)) == True:
                            row['goal_intersect'] = set()
            
           
            selected_case = findCases(df3)
            print(selected_case)
                        
    return rule_set

In [64]:
# #concept_list and goal_list has 1:1 mapping
# final_rules = []
# start_time = time.time()

# #Running algorithm for all the goals - Lower Approximation/concepts
# for i in range(0,len(lower_goal_list)):
#     findGoalIntersect(list(lower_goal_list[i]))
#     condition = []
#     B = set()
#     selected_case = findCases(df3)
    
#     rule_set = stepAlgo(df3,selected_case,lower_goal_list[i],B,condition,concept_list[i])
#     final_rules.append(rule_set)
    
# elapsed_time = time.time() - start_time

{8, 1, 3, 7}
[1, 6, 8, 10, 12, 15, 17, 19, 21, 23, 30, 32, 34, 36, 38, 44, 46, 48, 50, 52]
                    Cases                       att_val goal_intersect
1   sepal_length,5.0..6.7  [1, 2, 3, 4, 5, 7, 8, 9, 10]   {8, 1, 3, 7}
6   sepal_length,5.0..5.6               [1, 3, 7, 8, 9]   {8, 1, 3, 7}
8   sepal_length,5.0..5.9            [1, 2, 3, 7, 8, 9]   {8, 1, 3, 7}
10  sepal_length,5.0..6.2         [1, 2, 3, 4, 7, 8, 9]   {8, 1, 3, 7}
12  sepal_length,5.0..6.5      [1, 2, 3, 4, 5, 7, 8, 9]   {8, 1, 3, 7}
15   sepal_width,2.4..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
17   sepal_width,2.5..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
19   sepal_width,2.7..4.1            [1, 3, 4, 6, 7, 8]   {8, 1, 3, 7}
21   sepal_width,2.9..4.1               [1, 3, 4, 7, 8]   {8, 1, 3, 7}
23   sepal_width,3.2..4.1                  [1, 3, 7, 8]   {8, 1, 3, 7}
30  petal_length,1.5..2.5                  [1, 3, 7, 8]   {8, 1, 3, 7}
32  petal_length,1.5..3.6               [1, 3, 7, 8, 9]  




[1, 6, 8, 10, 12, 15, 17, 19, 21, 23, 30, 32, 34, 36, 38, 44, 46, 48, 50, 52]
                    Cases                       att_val goal_intersect
1   sepal_length,5.0..6.7  [1, 2, 3, 4, 5, 7, 8, 9, 10]   {8, 1, 3, 7}
6   sepal_length,5.0..5.6               [1, 3, 7, 8, 9]   {8, 1, 3, 7}
8   sepal_length,5.0..5.9            [1, 2, 3, 7, 8, 9]   {8, 1, 3, 7}
10  sepal_length,5.0..6.2         [1, 2, 3, 4, 7, 8, 9]   {8, 1, 3, 7}
12  sepal_length,5.0..6.5      [1, 2, 3, 4, 5, 7, 8, 9]   {8, 1, 3, 7}
15   sepal_width,2.4..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
17   sepal_width,2.5..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
19   sepal_width,2.7..4.1            [1, 3, 4, 6, 7, 8]   {8, 1, 3, 7}
21   sepal_width,2.9..4.1               [1, 3, 4, 7, 8]   {8, 1, 3, 7}
23   sepal_width,3.2..4.1                  [1, 3, 7, 8]   {8, 1, 3, 7}
30  petal_length,1.5..2.5                  [1, 3, 7, 8]   {8, 1, 3, 7}
32  petal_length,1.5..3.6               [1, 3, 7, 8, 9]   {8, 1, 3, 7

{8, 1, 3, 7}
[1, 6, 8, 10, 12, 15, 17, 19, 21, 23, 30, 32, 34, 36, 38, 44, 46, 48, 50, 52]
                    Cases                       att_val goal_intersect
1   sepal_length,5.0..6.7  [1, 2, 3, 4, 5, 7, 8, 9, 10]   {8, 1, 3, 7}
6   sepal_length,5.0..5.6               [1, 3, 7, 8, 9]   {8, 1, 3, 7}
8   sepal_length,5.0..5.9            [1, 2, 3, 7, 8, 9]   {8, 1, 3, 7}
10  sepal_length,5.0..6.2         [1, 2, 3, 4, 7, 8, 9]   {8, 1, 3, 7}
12  sepal_length,5.0..6.5      [1, 2, 3, 4, 5, 7, 8, 9]   {8, 1, 3, 7}
15   sepal_width,2.4..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
17   sepal_width,2.5..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
19   sepal_width,2.7..4.1            [1, 3, 4, 6, 7, 8]   {8, 1, 3, 7}
21   sepal_width,2.9..4.1               [1, 3, 4, 7, 8]   {8, 1, 3, 7}
23   sepal_width,3.2..4.1                  [1, 3, 7, 8]   {8, 1, 3, 7}
30  petal_length,1.5..2.5                  [1, 3, 7, 8]   {8, 1, 3, 7}
32  petal_length,1.5..3.6               [1, 3, 7, 8, 9]  

{8, 1, 3, 7}
[1, 6, 8, 10, 12, 15, 17, 19, 21, 23, 30, 32, 34, 36, 38, 44, 46, 48, 50, 52]
                    Cases                       att_val goal_intersect
1   sepal_length,5.0..6.7  [1, 2, 3, 4, 5, 7, 8, 9, 10]   {8, 1, 3, 7}
6   sepal_length,5.0..5.6               [1, 3, 7, 8, 9]   {8, 1, 3, 7}
8   sepal_length,5.0..5.9            [1, 2, 3, 7, 8, 9]   {8, 1, 3, 7}
10  sepal_length,5.0..6.2         [1, 2, 3, 4, 7, 8, 9]   {8, 1, 3, 7}
12  sepal_length,5.0..6.5      [1, 2, 3, 4, 5, 7, 8, 9]   {8, 1, 3, 7}
15   sepal_width,2.4..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
17   sepal_width,2.5..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
19   sepal_width,2.7..4.1            [1, 3, 4, 6, 7, 8]   {8, 1, 3, 7}
21   sepal_width,2.9..4.1               [1, 3, 4, 7, 8]   {8, 1, 3, 7}
23   sepal_width,3.2..4.1                  [1, 3, 7, 8]   {8, 1, 3, 7}
30  petal_length,1.5..2.5                  [1, 3, 7, 8]   {8, 1, 3, 7}
32  petal_length,1.5..3.6               [1, 3, 7, 8, 9]  

{8, 1, 3, 7}
[1, 6, 8, 10, 12, 15, 17, 19, 21, 23, 30, 32, 34, 36, 38, 44, 46, 48, 50, 52]
                    Cases                       att_val goal_intersect
1   sepal_length,5.0..6.7  [1, 2, 3, 4, 5, 7, 8, 9, 10]   {8, 1, 3, 7}
6   sepal_length,5.0..5.6               [1, 3, 7, 8, 9]   {8, 1, 3, 7}
8   sepal_length,5.0..5.9            [1, 2, 3, 7, 8, 9]   {8, 1, 3, 7}
10  sepal_length,5.0..6.2         [1, 2, 3, 4, 7, 8, 9]   {8, 1, 3, 7}
12  sepal_length,5.0..6.5      [1, 2, 3, 4, 5, 7, 8, 9]   {8, 1, 3, 7}
15   sepal_width,2.4..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
17   sepal_width,2.5..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
19   sepal_width,2.7..4.1            [1, 3, 4, 6, 7, 8]   {8, 1, 3, 7}
21   sepal_width,2.9..4.1               [1, 3, 4, 7, 8]   {8, 1, 3, 7}
23   sepal_width,3.2..4.1                  [1, 3, 7, 8]   {8, 1, 3, 7}
30  petal_length,1.5..2.5                  [1, 3, 7, 8]   {8, 1, 3, 7}
32  petal_length,1.5..3.6               [1, 3, 7, 8, 9]  

{8, 1, 3, 7}
[1, 6, 8, 10, 12, 15, 17, 19, 21, 23, 30, 32, 34, 36, 38, 44, 46, 48, 50, 52]
                    Cases                       att_val goal_intersect
1   sepal_length,5.0..6.7  [1, 2, 3, 4, 5, 7, 8, 9, 10]   {8, 1, 3, 7}
6   sepal_length,5.0..5.6               [1, 3, 7, 8, 9]   {8, 1, 3, 7}
8   sepal_length,5.0..5.9            [1, 2, 3, 7, 8, 9]   {8, 1, 3, 7}
10  sepal_length,5.0..6.2         [1, 2, 3, 4, 7, 8, 9]   {8, 1, 3, 7}
12  sepal_length,5.0..6.5      [1, 2, 3, 4, 5, 7, 8, 9]   {8, 1, 3, 7}
15   sepal_width,2.4..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
17   sepal_width,2.5..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
19   sepal_width,2.7..4.1            [1, 3, 4, 6, 7, 8]   {8, 1, 3, 7}
21   sepal_width,2.9..4.1               [1, 3, 4, 7, 8]   {8, 1, 3, 7}
23   sepal_width,3.2..4.1                  [1, 3, 7, 8]   {8, 1, 3, 7}
30  petal_length,1.5..2.5                  [1, 3, 7, 8]   {8, 1, 3, 7}
32  petal_length,1.5..3.6               [1, 3, 7, 8, 9]  

{8, 1, 3, 7}
[1, 6, 8, 10, 12, 15, 17, 19, 21, 23, 30, 32, 34, 36, 38, 44, 46, 48, 50, 52]
                    Cases                       att_val goal_intersect
1   sepal_length,5.0..6.7  [1, 2, 3, 4, 5, 7, 8, 9, 10]   {8, 1, 3, 7}
6   sepal_length,5.0..5.6               [1, 3, 7, 8, 9]   {8, 1, 3, 7}
8   sepal_length,5.0..5.9            [1, 2, 3, 7, 8, 9]   {8, 1, 3, 7}
10  sepal_length,5.0..6.2         [1, 2, 3, 4, 7, 8, 9]   {8, 1, 3, 7}
12  sepal_length,5.0..6.5      [1, 2, 3, 4, 5, 7, 8, 9]   {8, 1, 3, 7}
15   sepal_width,2.4..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
17   sepal_width,2.5..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
19   sepal_width,2.7..4.1            [1, 3, 4, 6, 7, 8]   {8, 1, 3, 7}
21   sepal_width,2.9..4.1               [1, 3, 4, 7, 8]   {8, 1, 3, 7}
23   sepal_width,3.2..4.1                  [1, 3, 7, 8]   {8, 1, 3, 7}
30  petal_length,1.5..2.5                  [1, 3, 7, 8]   {8, 1, 3, 7}
32  petal_length,1.5..3.6               [1, 3, 7, 8, 9]  

{8, 1, 3, 7}
[1, 6, 8, 10, 12, 15, 17, 19, 21, 23, 30, 32, 34, 36, 38, 44, 46, 48, 50, 52]
                    Cases                       att_val goal_intersect
1   sepal_length,5.0..6.7  [1, 2, 3, 4, 5, 7, 8, 9, 10]   {8, 1, 3, 7}
6   sepal_length,5.0..5.6               [1, 3, 7, 8, 9]   {8, 1, 3, 7}
8   sepal_length,5.0..5.9            [1, 2, 3, 7, 8, 9]   {8, 1, 3, 7}
10  sepal_length,5.0..6.2         [1, 2, 3, 4, 7, 8, 9]   {8, 1, 3, 7}
12  sepal_length,5.0..6.5      [1, 2, 3, 4, 5, 7, 8, 9]   {8, 1, 3, 7}
15   sepal_width,2.4..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
17   sepal_width,2.5..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
19   sepal_width,2.7..4.1            [1, 3, 4, 6, 7, 8]   {8, 1, 3, 7}
21   sepal_width,2.9..4.1               [1, 3, 4, 7, 8]   {8, 1, 3, 7}
23   sepal_width,3.2..4.1                  [1, 3, 7, 8]   {8, 1, 3, 7}
30  petal_length,1.5..2.5                  [1, 3, 7, 8]   {8, 1, 3, 7}
32  petal_length,1.5..3.6               [1, 3, 7, 8, 9]  

{8, 1, 3, 7}
[1, 6, 8, 10, 12, 15, 17, 19, 21, 23, 30, 32, 34, 36, 38, 44, 46, 48, 50, 52]
                    Cases                       att_val goal_intersect
1   sepal_length,5.0..6.7  [1, 2, 3, 4, 5, 7, 8, 9, 10]   {8, 1, 3, 7}
6   sepal_length,5.0..5.6               [1, 3, 7, 8, 9]   {8, 1, 3, 7}
8   sepal_length,5.0..5.9            [1, 2, 3, 7, 8, 9]   {8, 1, 3, 7}
10  sepal_length,5.0..6.2         [1, 2, 3, 4, 7, 8, 9]   {8, 1, 3, 7}
12  sepal_length,5.0..6.5      [1, 2, 3, 4, 5, 7, 8, 9]   {8, 1, 3, 7}
15   sepal_width,2.4..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
17   sepal_width,2.5..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
19   sepal_width,2.7..4.1            [1, 3, 4, 6, 7, 8]   {8, 1, 3, 7}
21   sepal_width,2.9..4.1               [1, 3, 4, 7, 8]   {8, 1, 3, 7}
23   sepal_width,3.2..4.1                  [1, 3, 7, 8]   {8, 1, 3, 7}
30  petal_length,1.5..2.5                  [1, 3, 7, 8]   {8, 1, 3, 7}
32  petal_length,1.5..3.6               [1, 3, 7, 8, 9]  

{8, 1, 3, 7}
[1, 6, 8, 10, 12, 15, 17, 19, 21, 23, 30, 32, 34, 36, 38, 44, 46, 48, 50, 52]
                    Cases                       att_val goal_intersect
1   sepal_length,5.0..6.7  [1, 2, 3, 4, 5, 7, 8, 9, 10]   {8, 1, 3, 7}
6   sepal_length,5.0..5.6               [1, 3, 7, 8, 9]   {8, 1, 3, 7}
8   sepal_length,5.0..5.9            [1, 2, 3, 7, 8, 9]   {8, 1, 3, 7}
10  sepal_length,5.0..6.2         [1, 2, 3, 4, 7, 8, 9]   {8, 1, 3, 7}
12  sepal_length,5.0..6.5      [1, 2, 3, 4, 5, 7, 8, 9]   {8, 1, 3, 7}
15   sepal_width,2.4..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
17   sepal_width,2.5..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
19   sepal_width,2.7..4.1            [1, 3, 4, 6, 7, 8]   {8, 1, 3, 7}
21   sepal_width,2.9..4.1               [1, 3, 4, 7, 8]   {8, 1, 3, 7}
23   sepal_width,3.2..4.1                  [1, 3, 7, 8]   {8, 1, 3, 7}
30  petal_length,1.5..2.5                  [1, 3, 7, 8]   {8, 1, 3, 7}
32  petal_length,1.5..3.6               [1, 3, 7, 8, 9]  

52   petal_width,0.1..1.6   [1, 2, 3, 4, 5, 6, 7, 8, 9]   {8, 1, 3, 7}
[1, 3, 7, 8]
Index(['Cases', 'att_val', 'goal_intersect'], dtype='object')
[9, 10, 15]
9
NOT
sepal_length,5.9..6.7
{8, 1, 3, 7}
[1, 6, 8, 10, 12, 15, 17, 19, 21, 23, 30, 32, 34, 36, 38, 44, 46, 48, 50, 52]
                    Cases                       att_val goal_intersect
1   sepal_length,5.0..6.7  [1, 2, 3, 4, 5, 7, 8, 9, 10]   {8, 1, 3, 7}
6   sepal_length,5.0..5.6               [1, 3, 7, 8, 9]   {8, 1, 3, 7}
8   sepal_length,5.0..5.9            [1, 2, 3, 7, 8, 9]   {8, 1, 3, 7}
10  sepal_length,5.0..6.2         [1, 2, 3, 4, 7, 8, 9]   {8, 1, 3, 7}
12  sepal_length,5.0..6.5      [1, 2, 3, 4, 5, 7, 8, 9]   {8, 1, 3, 7}
15   sepal_width,2.4..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
17   sepal_width,2.5..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
19   sepal_width,2.7..4.1            [1, 3, 4, 6, 7, 8]   {8, 1, 3, 7}
21   sepal_width,2.9..4.1               [1, 3, 4, 7, 8]   {8, 1, 3, 7}
23   sepal_wi

NOT
sepal_length,5.9..6.7
{8, 1, 3, 7}
[1, 6, 8, 10, 12, 15, 17, 19, 21, 23, 30, 32, 34, 36, 38, 44, 46, 48, 50, 52]
                    Cases                       att_val goal_intersect
1   sepal_length,5.0..6.7  [1, 2, 3, 4, 5, 7, 8, 9, 10]   {8, 1, 3, 7}
6   sepal_length,5.0..5.6               [1, 3, 7, 8, 9]   {8, 1, 3, 7}
8   sepal_length,5.0..5.9            [1, 2, 3, 7, 8, 9]   {8, 1, 3, 7}
10  sepal_length,5.0..6.2         [1, 2, 3, 4, 7, 8, 9]   {8, 1, 3, 7}
12  sepal_length,5.0..6.5      [1, 2, 3, 4, 5, 7, 8, 9]   {8, 1, 3, 7}
15   sepal_width,2.4..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
17   sepal_width,2.5..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
19   sepal_width,2.7..4.1            [1, 3, 4, 6, 7, 8]   {8, 1, 3, 7}
21   sepal_width,2.9..4.1               [1, 3, 4, 7, 8]   {8, 1, 3, 7}
23   sepal_width,3.2..4.1                  [1, 3, 7, 8]   {8, 1, 3, 7}
30  petal_length,1.5..2.5                  [1, 3, 7, 8]   {8, 1, 3, 7}
32  petal_length,1.5..3.6      

{8, 1, 3, 7}
[1, 6, 8, 10, 12, 15, 17, 19, 21, 23, 30, 32, 34, 36, 38, 44, 46, 48, 50, 52]
                    Cases                       att_val goal_intersect
1   sepal_length,5.0..6.7  [1, 2, 3, 4, 5, 7, 8, 9, 10]   {8, 1, 3, 7}
6   sepal_length,5.0..5.6               [1, 3, 7, 8, 9]   {8, 1, 3, 7}
8   sepal_length,5.0..5.9            [1, 2, 3, 7, 8, 9]   {8, 1, 3, 7}
10  sepal_length,5.0..6.2         [1, 2, 3, 4, 7, 8, 9]   {8, 1, 3, 7}
12  sepal_length,5.0..6.5      [1, 2, 3, 4, 5, 7, 8, 9]   {8, 1, 3, 7}
15   sepal_width,2.4..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
17   sepal_width,2.5..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
19   sepal_width,2.7..4.1            [1, 3, 4, 6, 7, 8]   {8, 1, 3, 7}
21   sepal_width,2.9..4.1               [1, 3, 4, 7, 8]   {8, 1, 3, 7}
23   sepal_width,3.2..4.1                  [1, 3, 7, 8]   {8, 1, 3, 7}
30  petal_length,1.5..2.5                  [1, 3, 7, 8]   {8, 1, 3, 7}
32  petal_length,1.5..3.6               [1, 3, 7, 8, 9]  

{8, 1, 3, 7}
[1, 6, 8, 10, 12, 15, 17, 19, 21, 23, 30, 32, 34, 36, 38, 44, 46, 48, 50, 52]
                    Cases                       att_val goal_intersect
1   sepal_length,5.0..6.7  [1, 2, 3, 4, 5, 7, 8, 9, 10]   {8, 1, 3, 7}
6   sepal_length,5.0..5.6               [1, 3, 7, 8, 9]   {8, 1, 3, 7}
8   sepal_length,5.0..5.9            [1, 2, 3, 7, 8, 9]   {8, 1, 3, 7}
10  sepal_length,5.0..6.2         [1, 2, 3, 4, 7, 8, 9]   {8, 1, 3, 7}
12  sepal_length,5.0..6.5      [1, 2, 3, 4, 5, 7, 8, 9]   {8, 1, 3, 7}
15   sepal_width,2.4..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
17   sepal_width,2.5..4.1     [1, 2, 3, 4, 6, 7, 8, 10]   {8, 1, 3, 7}
19   sepal_width,2.7..4.1            [1, 3, 4, 6, 7, 8]   {8, 1, 3, 7}
21   sepal_width,2.9..4.1               [1, 3, 4, 7, 8]   {8, 1, 3, 7}
23   sepal_width,3.2..4.1                  [1, 3, 7, 8]   {8, 1, 3, 7}
30  petal_length,1.5..2.5                  [1, 3, 7, 8]   {8, 1, 3, 7}
32  petal_length,1.5..3.6               [1, 3, 7, 8, 9]  

KeyboardInterrupt: 

In [None]:
final_rules

In [None]:
lower_goal_list

In [65]:
#There is a problem in findcases... i
#t's picking the index of new_df as the final cases 
#which should actually be the matched cases index of df3
#Check on that