# CANDIDATE-ELIMINATION Learning Algorithm in Concept Learning

_Finding set of all hypotheses consistent with training examples._

In [2]:
# imports required packages

import pandas as pd
import numpy as np

## Preparing Data

In [4]:
# Reads relevant data

data = pd.read_csv("../Data/enjoysport.csv")

In [5]:
# Views the data

display(data)

Unnamed: 0,Sky,AirTemp,Humidity,Wind,Water,Forecast,EnjoySport
0,Sunny,Warm,Normal,Strong,Warm,Same,Yes
1,Sunny,Warm,High,Strong,Warm,Same,Yes
2,Rainy,Cold,High,Strong,Warm,Change,No
3,Sunny,Warm,High,Strong,Cool,Change,Yes


In [6]:
# X represents a set of instances over which concept of learning is defined

X = data.copy()

In [7]:
# X represents a set of instances over which concept of learning is defined

X = np.array(data.iloc[:,0:-1])

In [8]:
# Shows training examples (without target)

display(X)

array([['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same'],
       ['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same'],
       ['Rainy', 'Cold', 'High', 'Strong', 'Warm', 'Change'],
       ['Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change']],
      dtype=object)

In [9]:
# Seperates target in an array
target = np.array(data.iloc[:,-1])

In [10]:
display(target)

array(['Yes', 'Yes', 'No', 'Yes'], dtype=object)

## Applying Candidate-Elimination Algorithm to Get Specific & General Hopothesis Boundary Sets

_**Psedocode for the Candidate-Elimination algorithm**_

1. Initialize G to the set of maximally general hypotheses in H
2. Initialize S to the set of maximally specific hypotheses in H
3. For each training example d, do
    - If d is a positive example
        - Remove from G any hypothesis inconsistent with d
        - For each hypothesis s in S that is not consistent with d ,-
            - Remove s from S
            - Add to S all minimal generalizations h of s such that
                - h is consistent with d, and some member of G is more general than h
            - Remove from S any hypothesis that is more general than another hypothesis in S
    - If d is a negative example
        - Remove from S any hypothesis inconsistent with d
        - For each hypothesis g in G that is not consistent with d
            - Remove g from G
            - Add to G all minimal specializations h of g such that
                - h is consistent with d, and some member of S is more specific than h
            - Remove from G any hypothesis that is less general than another hypothesis in G

In [13]:
class CandidateElimination():
    """
    Encapsulates the Candidate-Elimination algorithm 
    """    

    def fit(self, X, target):
        """
        Finding specific and general boundary sets of hypotheses consistent with training examples.
        
        Attributes
        ----------
        X: dataframe
            instances of training examples
        target: series
            the label against each instance
        """

        # Initializes boundry for specific hypothesis
        specific_h = ['Φ' for i in range(X.shape[1])]

        # Initializes boundry for general hypotheses
        general_h = [["?" for i in range(X.shape[1])] for i in range(X.shape[1])]

        print("\nInitialization:\nSpecific Boundary: {}\nGeneral Boundary: {}".format(
            specific_h, general_h))

        # Iterates through the example instances
        for i, x in enumerate(X):
            print("\nAfter Instance #", i+1 , ":", x, "[POSITIVE]" if target[i] == "Yes" else "[NEGATIVE]")

            if target[i] == "Yes":
                for i, attr in enumerate(x): 
                    if specific_h[i] == 'Φ':
                        specific_h[i] = attr
                    elif attr != specific_h[i]:                    
                        specific_h[i] ='?'                     
                        general_h[i][i] ='?'               

            elif target[i] == "No":
                for i, attr in enumerate(x):
                    if attr != specific_h[i]:                    
                        general_h[i][i] = specific_h[i]                
                    else:                    
                        general_h[i][i] = '?'                     

            print("Specific Boundary:", specific_h)         
            print("Generic Boundary:", general_h)

        # Removes most general hypotheses from the general boundry set
        most_general_h = ["?" for i in range(X.shape[1])]
        indices = [i for i, val in enumerate(general_h) if val == most_general_h]    
        for i in indices:
            general_h.remove(most_general_h) 

        return specific_h, general_h 

In [14]:
# Instantiate algorithm and fits the training examples

candidate_elimination = CandidateElimination()

specific_boundary, general_boundary = candidate_elimination.fit(X, target)


Initialization:
Specific Boundary: ['Φ', 'Φ', 'Φ', 'Φ', 'Φ', 'Φ']
General Boundary: [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

After Instance # 1 : ['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same'] [POSITIVE]
Specific Boundary: ['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same']
Generic Boundary: [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

After Instance # 2 : ['Sunny' 'Warm' 'High' 'Strong' 'Warm' 'Same'] [POSITIVE]
Specific Boundary: ['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']
Generic Boundary: [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'

In [15]:
# Shows both specific and general boundary sets

print("Hypothesis in Specific Boundary:", specific_boundary, "\n")
print("Hypotheses in General Boundary:", general_boundary)

Hypothesis in Specific Boundary: ['Sunny', 'Warm', '?', 'Strong', '?', '?'] 

Hypotheses in General Boundary: [['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?']]
