### PROGRAM 03: CANDIDATE ELIMINATION ALGORITHM

In [1]:
import pandas as pd
import numpy as np
from pprint import pprint

enjoy_sport_data = "datasets/enjoy_sport.csv"

df = pd.read_csv(enjoy_sport_data, index_col=0)
df

Unnamed: 0,sky,airTemp,humidity,wind,water,forecast,enjoySport
0,Sunny,Warm,Normal,Strong,Warm,Same,1
1,Sunny,Warm,High,Strong,Warm,Same,1
2,Rainy,Cold,High,Strong,Warm,Change,0
3,Sunny,Warm,High,Strong,Cool,Change,1


In [2]:
concepts = np.array(df.iloc[:, :-1])
concepts

array([[' Sunny', ' Warm', ' Normal', ' Strong', ' Warm', ' Same'],
       [' Sunny', ' Warm', ' High', ' Strong', ' Warm', ' Same'],
       [' Rainy', ' Cold', ' High', ' Strong', ' Warm', ' Change'],
       [' Sunny', ' Warm', ' High', ' Strong', ' Cool', ' Change']],
      dtype=object)

In [3]:
target = np.array(df.iloc[:,-1])
target

array([1, 1, 0, 1], dtype=int64)

In [4]:
def learn(concepts, target):
    num_features = len(concepts[0])
    
    # Initializing general and specific hypothesis
    print("Initializing...\n")
    
    # Getting the first row with target as 1 for specific hypothesis
    specific_hypothesis = next(
        concepts[idx] 
        for idx, output in enumerate(target) 
        if output == 1
    )
    
    general_hypothesis = [['?']*num_features for _ in range(num_features)]

    print("Specific hypothesis: ", specific_hypothesis)
    print("General hypothesis: ")
    pprint(general_hypothesis)

    for row, output in zip(concepts, target):
        if output == 1:
            for i in range(num_features):
                if row[i] != specific_hypothesis[i]:
                    specific_hypothesis[i] = '?'
                    general_hypothesis[i][i] = '?'
        
        elif output == 0:
            for i in range(num_features):
                if row[i] != specific_hypothesis[i]:
                    general_hypothesis[i][i] = specific_hypothesis[i]
                else:
                    general_hypothesis[i][i] = '?'
    
    # Filtering out hypothesis with all '?' in general_hypothesis list
    filter = ['?']*num_features
    general_hypothesis = [h for h in general_hypothesis if h != filter]

    return specific_hypothesis, general_hypothesis

In [5]:
specific_hypothesis, general_hypothesis = learn(concepts, target)


print("\n\nFinal Hypothesis\n")
print("Specific hypothesis: ", specific_hypothesis)
print("General hypothesis: ", )
pprint(general_hypothesis, width=40)

Initializing...

Specific hypothesis:  [' Sunny' ' Warm' ' Normal' ' Strong' ' Warm' ' Same']
General hypothesis: 
[['?', '?', '?', '?', '?', '?'],
 ['?', '?', '?', '?', '?', '?'],
 ['?', '?', '?', '?', '?', '?'],
 ['?', '?', '?', '?', '?', '?'],
 ['?', '?', '?', '?', '?', '?'],
 ['?', '?', '?', '?', '?', '?']]


Final Hypothesis

Specific hypothesis:  [' Sunny' ' Warm' '?' ' Strong' '?' '?']
General hypothesis: 
[[' Sunny', '?', '?', '?', '?', '?'],
 ['?', ' Warm', '?', '?', '?', '?']]
