# Candidate-Elimination Algorithm

For a given set of training data examples stored in a .CSV file, implement and
demonstrate the Candidate-Elimination Algorithm to output a description of the set
of all hypotheses consistent with the training examples.

In [1]:
import pandas as pd
import numpy as np

In [2]:
#Import dataset
df=pd.read_csv("enjoysport.csv")

In [3]:
df

Unnamed: 0,sky,air_temp,humidity,wind,water,forecast,enjoy_sport
0,sunny,warm,normal,strong,warm,same,yes
1,sunny,warm,high,strong,warm,same,yes
2,rainy,cold,high,strong,warm,change,no
3,sunny,warm,high,strong,cool,change,yes


In [4]:
# Assign all columns except last col to concept
concept = np.array(df.iloc[:,:-1])
# Assign last column to target
target = np.array(df.iloc[:,-1])

In [5]:
concept

array([['sunny', 'warm', 'normal', 'strong', 'warm', 'same'],
       ['sunny', 'warm', 'high', 'strong', 'warm', 'same'],
       ['rainy', 'cold', 'high', 'strong', 'warm', 'change'],
       ['sunny', 'warm', 'high', 'strong', 'cool', 'change']],
      dtype=object)

In [6]:
target

array(['yes', 'yes', 'no', 'yes'], dtype=object)

In [7]:
def learn(concepts, target):
    
    #specific_h will initially be same as the first concept hypothesis 
    specific_h = concepts[0].copy()
    # general_h is init to all `?` , and since we can have multiple general_h we create multiple of them
    general_h = [["?"]*len(specific_h) for i in range(len(specific_h))]
    
    #Loop over each training sample
    for i,h in enumerate(concepts):
        
        # if its a true class data 
        if target[i] == "yes":
            # Loop over each attribute
            for x in range(len(specific_h)):
                # if the current hypothesis attribute doesnt match the specific_h attribute
                if h[x] != specific_h[x]:
                    # make the specific hypothesis more general
                    specific_h[x]="?"
                    general_h[x][x] = "?"
                    
        # If its a negative instance then 
        if target[i]=="no":
             # Loop over each attribute
            for x in range(len(specific_h)):
                # if values of current hypothesis and specific do not match then it means the 
                # value of the specific hypothesis is the right one as this is a negative instance.
                if h[x]!=specific_h[x]:
                    # Hence assign value of specific to general
                    general_h[x][x]=specific_h[x]
                else:
                    # If the 2 values do match then this attribute isnt a reliable measure hence,fully generalize
                    general_h[x][x]="?"
                    
        print("Iter ",i,"\n specific : ",specific_h,"\n general:\n",general_h)
    
    # Remove completely general hypothesis, i.e - where all attrbutes have value = '?'
    while ["?"]*len(specific_h) in general_h:
        general_h.remove(["?"]*len(specific_h))
    
    return specific_h,general_h

In [8]:
# Call the function , returns a specific and a list of general hypothesis
s , g = learn(concept,target)

Iter  0 
 specific :  ['sunny' 'warm' 'normal' 'strong' 'warm' 'same'] 
 general:
 [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]
Iter  1 
 specific :  ['sunny' 'warm' '?' 'strong' 'warm' 'same'] 
 general:
 [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]
Iter  2 
 specific :  ['sunny' 'warm' '?' 'strong' 'warm' 'same'] 
 general:
 [['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', 'same']]
Iter  3 
 specific :  ['sunny' 'warm' '?' 'strong' '?' '?'] 
 general:
 [['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?

In [9]:
#Specific hypothesis for the training data is 
s

array(['sunny', 'warm', '?', 'strong', '?', '?'], dtype=object)

In [10]:
# List of all general hypothesis
[r for r in g]

[['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?']]

# END