# Candidate Elimination Learning Algorithm

In [29]:
# Code by Bhavy Kharbanda
# Sap Id: 500082531

In [30]:
# Reference for the video of this algorithm.
# https://www.i2tutorials.com/machine-learning-tutorial/machine-learning-candidate-elimination-learning-algorithm/
# https://www.youtube.com/watch?v=O2wYwFOMQ24

In [31]:
# The key idea in the CANDIDATE-ELIMINATlON Algo is to output a description of the set of all hypotheses consistent with the training examples. 

In [32]:
# Candidate Elimination Algorithm:
# 1. Initialize both specific and general hypotheses.  
# S = < ‘ϕ’, ‘ϕ’, ‘ϕ’, ….., ‘ϕ’ >
# G = < ‘?’, ‘?’, ‘?’, ….., ’?’>

# Depending on the number of attributes.
# 2. Take the next example, if the taken example is positive make a specific hypothesis to general.

# 3. If the taken example is negative make the general hypothesis to a more specific hypothesis.

In [33]:
# Import libraries
import numpy as np 
import pandas as pd

In [34]:
# Importing the dataset
data = pd.read_csv('Datasets\EnjoySport.csv')

# Specifying the column

instances = np.array(data.iloc[:,0:-1])
print("\nInstances are:\n",instances)

target = np.array(data.iloc[:,-1])
print("\nTarget Values are: ",target)


Instances are:
 [['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same']
 ['Sunny' 'Warm' 'High' 'Strong' 'Warm' 'Same']
 ['Rainy' 'Cold' 'High' 'Strong' 'Warm' 'Change']
 ['Sunny' 'Warm' 'High' 'Strong' 'Cool' 'Change']]

Target Values are:  ['Yes' 'Yes' 'No' 'Yes']


In [35]:
# Function to learn the algorithm:
def learn(instances, target): 
    specific_h = instances[0].copy()
    print("\nInitialization of specific_hypothesis and genearal_hypothesis: ")
    print("\nSpecific Hypothesis: ", specific_h)
    general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))]
    print("\nGeneral Hypothesis: ",general_h)  

    # Loop to traverse in the instances given
    for i, h in enumerate(instances):
        print("\nInstance", i+1, ": ")
        print(h)
        if target[i] == "Yes":
            print("Instance is Positive ")
            for x in range(len(specific_h)): 
                if h[x]!= specific_h[x]:                    
                    specific_h[x] ='?'                     
                    general_h[x][x] ='?'
                   
        if target[i] == "No":            
            print("Instance is Negative ")
            for x in range(len(specific_h)): 
                if h[x]!= specific_h[x]:                    
                    general_h[x][x] = specific_h[x]                
                else:                    
                    general_h[x][x] = '?'        
        
        print("Specific Bundary now:  ", specific_h)         
        print("Generic Boundary now:  ",  general_h)
        print("\n")

    indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?', '?']]    
    for i in indices:   
        general_h.remove(['?', '?', '?', '?', '?', '?']) 
    return specific_h, general_h 

In [36]:
# Fetching the function's results
specific_result, general_result = learn(instances, target)


Initialization of specific_hypothesis and genearal_hypothesis: 

Specific Hypothesis:  ['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same']

General Hypothesis:  [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

Instance 1 : 
['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same']
Instance is Positive 
Specific Bundary now:   ['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same']
Generic Boundary now:   [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]



Instance 2 : 
['Sunny' 'Warm' 'High' 'Strong' 'Warm' 'Same']
Instance is Positive 
Specific Bundary now:   ['Sunny' 'Warm' '?' 'Strong' 'Warm' 'Same']
Generic Boundary now:   [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'

In [37]:
# Printing the final hypothesis: 
print("\nFinal Specific Hypothesis: ", specific_result)
print("\nFinal General Hypothesis: ", general_result)


Final Specific Hypothesis:  ['Sunny' 'Warm' '?' 'Strong' '?' '?']

Final General Hypothesis:  [['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?']]


### Working on another dataset

In [38]:
# Importing the dataset
data = pd.read_csv('Datasets\Shape_dataset.csv')

# Specifying the column

instances = np.array(data.iloc[:,0:-1])
print("\nInstances are:\n",instances)

target = np.array(data.iloc[:,-1])
print("\nTarget Values are: ",target)


Instances are:
 [['Big' 'Red' 'Circle']
 ['Small' 'Red' 'Triangle']
 ['Small' 'Red' 'Circle']
 ['Big' 'Blue' 'Circle']
 ['Small' 'Blue' 'Circle']]

Target Values are:  ['No' 'No' 'Yes' 'Yes' 'Yes']


In [43]:
# Function to learn the algorithm:
def learn(instances, target): 
    specific_h = instances[0].copy()
    print("\nInitialization of specific_hypothesis and genearal_hypothesis: ")
    print("\nSpecific Hypothesis: ", specific_h)
    general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))]
    print("\nGeneral Hypothesis: ",general_h)  

    # Loop to traverse in the instances given
    for i, h in enumerate(instances):
        print("\nInstance", i+1, ": ")
        print(h)
        if target[i] == "Yes":
            print("Instance is Positive ")
            for x in range(len(specific_h)): 
                if h[x]!= specific_h[x]:                    
                    specific_h[x] ='?'                     
                    general_h[x][x] ='?'
                   
        if target[i] == "No":            
            print("Instance is Negative ")
            for x in range(len(specific_h)): 
                if h[x]!= specific_h[x]:                    
                    general_h[x][x] = specific_h[x]                
                else:                    
                    general_h[x][x] = '?'        
        
        print("Specific Bundary now:  ", specific_h)         
        print("Generic Boundary now:  ",  general_h)
        print("\n")

    indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?', '?']]    
    for i in indices:   
        general_h.remove(['?', '?', '?', '?', '?', '?']) 
    return specific_h, general_h 

In [40]:
# Fetching the function's results
specific_result, general_result = learn(instances, target)


Initialization of specific_hypothesis and genearal_hypothesis: 

Specific Hypothesis:  ['Big' 'Red' 'Circle']

General Hypothesis:  [['?', '?', '?'], ['?', '?', '?'], ['?', '?', '?']]

Instance 1 : 
['Big' 'Red' 'Circle']
Instance is Negative 
Specific Bundary now:   ['Big' 'Red' 'Circle']
Generic Boundary now:   [['?', '?', '?'], ['?', '?', '?'], ['?', '?', '?']]



Instance 2 : 
['Small' 'Red' 'Triangle']
Instance is Negative 
Specific Bundary now:   ['Big' 'Red' 'Circle']
Generic Boundary now:   [['Big', '?', '?'], ['?', '?', '?'], ['?', '?', 'Circle']]



Instance 3 : 
['Small' 'Red' 'Circle']
Instance is Positive 
Specific Bundary now:   ['?' 'Red' 'Circle']
Generic Boundary now:   [['?', '?', '?'], ['?', '?', '?'], ['?', '?', 'Circle']]



Instance 4 : 
['Big' 'Blue' 'Circle']
Instance is Positive 
Specific Bundary now:   ['?' '?' 'Circle']
Generic Boundary now:   [['?', '?', '?'], ['?', '?', '?'], ['?', '?', 'Circle']]



Instance 5 : 
['Small' 'Blue' 'Circle']
Instance is Posi

In [41]:
# Printing the final hypothesis: 
print("\nFinal Specific Hypothesis: ", specific_result)
print("\nFinal General Hypothesis: ", general_result)


Final Specific Hypothesis:  ['?' '?' 'Circle']

Final General Hypothesis:  [['?', '?', '?'], ['?', '?', '?'], ['?', '?', 'Circle']]


### Working on third dataset

In [47]:
# Importing the dataset
data = pd.read_csv('Datasets\Price_dataset.csv')

# Specifying the column

instances = np.array(data.iloc[:,0:-1])
print("\nInstances are:\n",instances)

target = np.array(data.iloc[:,-1])
print("\nTarget Values are: ",target)


Instances are:
 [['Some' 'Small' 'No' 'Affordable' 'One']
 ['Many' 'Big' 'No' 'Expensive' 'Many']
 ['Many' 'Medium' 'No' 'Expensive' 'Few']
 ['Many' 'Small' 'No' 'Affordable' 'Many']]

Target Values are:  ['No' 'Yes' 'Yes' 'Yes']


In [48]:
# Function to learn the algorithm:
def learn(instances, target): 
    specific_h = instances[0].copy()
    print("\nInitialization of specific_hypothesis and genearal_hypothesis: ")
    print("\nSpecific Hypothesis: ", specific_h)
    general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))]
    print("\nGeneral Hypothesis: ",general_h)  

    # Loop to traverse in the instances given
    for i, h in enumerate(instances):
        print("\nInstance", i+1, ": ")
        print(h)
        if target[i] == "Yes":
            print("Instance is Positive ")
            for x in range(len(specific_h)): 
                if h[x]!= specific_h[x]:                    
                    specific_h[x] ='?'                     
                    general_h[x][x] ='?'
                   
        if target[i] == "No":            
            print("Instance is Negative ")
            for x in range(len(specific_h)): 
                if h[x]!= specific_h[x]:                    
                    general_h[x][x] = specific_h[x]                
                else:                    
                    general_h[x][x] = '?'        
        
        print("Specific Bundary now:  ", specific_h)         
        print("Generic Boundary now:  ",  general_h)
        print("\n")

    indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?', '?']]    
    for i in indices:   
        general_h.remove(['?', '?', '?', '?', '?', '?']) 
    return specific_h, general_h 

In [49]:
# Fetching the function's results
specific_result, general_result = learn(instances, target)


Initialization of specific_hypothesis and genearal_hypothesis: 

Specific Hypothesis:  ['Some' 'Small' 'No' 'Affordable' 'One']

General Hypothesis:  [['?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?']]

Instance 1 : 
['Some' 'Small' 'No' 'Affordable' 'One']
Instance is Negative 
Specific Bundary now:   ['Some' 'Small' 'No' 'Affordable' 'One']
Generic Boundary now:   [['?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?']]



Instance 2 : 
['Many' 'Big' 'No' 'Expensive' 'Many']
Instance is Positive 
Specific Bundary now:   ['?' '?' 'No' '?' '?']
Generic Boundary now:   [['?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?']]



Instance 3 : 
['Many' 'Medium' 'No' 'Expensive' 'Few']
Instance is Positive 
Specific Bundary now:   ['?' '?' 'No' '?' '?'

In [50]:
# Printing the final hypothesis: 
print("\nFinal Specific Hypothesis: ", specific_result)
print("\nFinal General Hypothesis: ", general_result)


Final Specific Hypothesis:  ['?' '?' 'No' '?' '?']

Final General Hypothesis:  [['?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?']]
