## **Candidate Elimination Algorithm**

### Data pre-processing

In [1]:
import pandas as pd

In [2]:
data = pd.read_csv("data1.csv")

data

Unnamed: 0,Sky,Temperature,Humid,Wind,Water,Forest,Output
0,sunny,warm,normal,strong,warm,same,yes
1,sunny,warm,high,strong,warm,same,yes
2,rainy,cold,high,strong,warm,change,no
3,sunny,warm,high,strong,cool,change,yes


In [3]:
features = data.iloc[:, :-1].values

features

array([['sunny', 'warm', 'normal', 'strong', 'warm', 'same'],
       ['sunny', 'warm', 'high', 'strong', 'warm', 'same'],
       ['rainy', 'cold', 'high', 'strong', 'warm', 'change'],
       ['sunny', 'warm', 'high', 'strong', 'cool', 'change']],
      dtype=object)

In [4]:
target = data.iloc[:, -1:].values

target = [i[0] for i in target]

target

['yes', 'yes', 'no', 'yes']

### Algorithm

#### Initializing S and G

In [5]:
S = features[0]

G = [['?' for i in range(len(features[0]))] for j in range(len(features[0]))]

print("S:", S)
print("G:", G)

S: ['sunny' 'warm' 'normal' 'strong' 'warm' 'same']
G: [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]


#### Generalize S

In [6]:
def generalizeS(S: list, instance: list)->list:
    for i in range(len(instance)):
        S[i] = '?' if S[i] != instance[i] else instance[i]
    return S

#### Check if G is consistant

In [7]:
def isConsistentWithS(S: list, instance: list) -> bool:
    for i in range(len(instance)):
        if S[i] == '?' and instance[i] != '?':
            return False
        elif S[i] != '?' and instance[i] != '?' and S[i] != instance[i]:
            return False
    return True

#### Specialize G

In [8]:
def specializeG(S: list, G: list[list], instance: list)->list[list]:
    for i in range(len(G)):
        if S[i] != instance[i]:
            G[i][i] = S[i]
        else:
            G[i][i] = '?'
    return G

#### Remove Inconsistent Hypothesis from G

In [9]:
def cleanUpG(S: list, G: list[list])-> list[list]:
    i = 0
    while i < len(G):
        if not isConsistentWithS(S, G[i]):
            G.remove(G[i])
        elif G[i] == ['?']*len(G[0]):
            G.remove(G[i])
        else:
            i += 1
    return G

#### Candidate Elimination Algorithm

In [10]:
def candidateElimination(S: list, G: list, features: list[list], target: list):
    for i in range(len(target)):
        if target[i].lower() == 'yes':
            S = generalizeS(S, features[i])
        elif target[i].lower() == 'no':
            G = specializeG(S, G, features[i])
            cleanUpG(S, G)
    cleanUpG(S, G)
    return S, G


#### Training the model

In [11]:
SH, GH = candidateElimination(S, G, features, target)
print(SH)
print(GH)

['sunny' 'warm' '?' 'strong' '?' '?']
[['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?']]
