In [13]:
# Candidate Elimination Algorithm for EnjoySport dataset (with CSV)

import pandas as pd

# Step 1: Load dataset from CSV
data = pd.read_csv("enjoysport.csv")
print("Dataset Loaded:\n", data, "\n")

# Step 2: Initialize S and G
n_features = data.shape[1] - 1
S = ["Ø"] * n_features   # Most specific hypothesis
G = [["?"] * n_features] # Most general hypothesis

def more_general(h1, h2):
    """Check if hypothesis h1 is more general than h2"""
    more_general_parts = []
    for x, y in zip(h1, h2):
        mg = x == "?" or (x != "Ø" and (x == y))
        more_general_parts.append(mg)
    return all(more_general_parts)

# Step 3: Candidate Elimination Algorithm
for i, row in data.iterrows():
    instance = row[:-1].tolist()
    label = row[-1]

    if label == "Yes":  # Positive example
        # Generalize S minimally to cover instance
        for j in range(n_features):
            if S[j] == "Ø":
                S[j] = instance[j]
            elif S[j] != instance[j]:
                S[j] = "?"
        # Remove from G any hypothesis inconsistent with instance
        G = [g for g in G if all(s == "?" or s == x or s == "Ø"
                                 for s, x in zip(g, instance))]

    else:  # Negative example
        new_G = []
        for g in G:
            if all(s == "?" or s == x or s == "Ø"
                   for s, x in zip(g, instance)):
                # Specialize g to exclude negative instance
                for j in range(n_features):
                    if g[j] == "?":
                        for val in set(data.iloc[:, j]) - {instance[j]}:
                            new_hypothesis = g.copy()
                            new_hypothesis[j] = val
                            if more_general(new_hypothesis, S):
                                new_G.append(new_hypothesis)
        G = new_G

    print(f"Step {i+1} → Instance: {instance}, Label: {label}")
    print("S:", S)
    print("G:", G)
    print("--------------------------------------------------")

# Step 4: Final Output
print("\nFinal Specific Boundary (S):", S)
print("Final General Boundary (G):", G)


Dataset Loaded:
      Sky AirTemp Humidity    Wind Water Forecast EnjoySport
0  Sunny    Warm   Normal  Strong  Warm     Same        Yes
1  Sunny    Warm     High  Strong  Warm     Same        Yes
2  Rainy    Cold     High  Strong  Warm   Change         No
3  Sunny    Warm     High  Strong  Cold   Change        Yes 

Step 1 → Instance: ['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same'], Label: Yes
S: ['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same']
G: [['?', '?', '?', '?', '?', '?']]
--------------------------------------------------
Step 2 → Instance: ['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same'], Label: Yes
S: ['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']
G: [['?', '?', '?', '?', '?', '?']]
--------------------------------------------------
Step 3 → Instance: ['Rainy', 'Cold', 'High', 'Strong', 'Warm', 'Change'], Label: No
S: ['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']
G: [['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['?', '?', '?', '?'

  label = row[-1]
