In [6]:
import pandas as pd

# Step 1: Create the Training Dataset
data = [
    ["Sunny", "Warm", "Normal", "Strong", "Warm", "Same", "Yes"],
    ["Sunny", "Warm", "High", "Strong", "Warm", "Same", "Yes"],
    ["Rainy", "Cold", "High", "Strong", "Warm", "Change", "No"],
    ["Sunny", "Warm", "High", "Strong", "Cool", "Change", "Yes"],
    ["Overcast", "Hot", "High", "Weak", "Cool", "Same", "Yes"],
    ["Rainy", "Warm", "High", "Strong", "Warm", "Same", "No"],
    ["Sunny", "Warm", "Normal", "Strong", "Warm", "Same", "Yes"],
    ["Sunny", "Hot", "High", "Weak", "Cool", "Change", "Yes"],
    ["Overcast", "Cold", "Normal", "Weak", "Warm", "Same", "Yes"],
    ["Rainy", "Cold", "High", "Weak", "Cool", "Change", "No"]
]

# Convert to DataFrame
columns = ["Sky", "Temp", "Humidity", "Wind", "Water", "Forecast", "PlayTennis"]
df = pd.DataFrame(data, columns=columns)

# Candidate Elimination Algorithm
def candidate_elimination(training_data):
    num_attributes = len(training_data.columns) - 1  # Number of attributes

    # Initialize Specific Hypothesis (S) as the most specific
    S = ["∅"] * num_attributes  

    # Initialize General Hypothesis (G) as the most general
    G = [["?"] * num_attributes]  

    for _, row in training_data.iterrows():
        instance = row.iloc[:-1].tolist()  # Extract feature values
        label = row.iloc[-1]  # Extract class label

        if label == "Yes":  # Positive Example
            if S[0] == "∅":
                S = instance  # Initialize S with the first positive example
            else:
                for i in range(num_attributes):
                    if S[i] != instance[i]:  
                        S[i] = "?"  # Generalize S minimally

            # Remove inconsistent hypotheses from G
            G = [g for g in G if all(g[k] == "?" or g[k] == instance[k] for k in range(num_attributes))]

        else:  # Negative Example
            new_G = []
            for g in G:
                for i in range(num_attributes):
                    if g[i] == "?":  # Specializing G
                        new_g = g.copy()
                        new_g[i] = instance[i]
                        if new_g not in new_G:
                            new_G.append(new_g)
            G.extend(new_G)

            # Remove overly specific hypotheses from G
            G = [g for g in G if any(g[k] == "?" for k in range(num_attributes))]

    return S, G

# Run the Algorithm and Print Results
S_final, G_final = candidate_elimination(df)
print("Final Specific Hypothesis (S):", S_final)
print("Final General Hypotheses (G):", G_final)


Final Specific Hypothesis (S): ['?', '?', '?', '?', '?', '?']
Final General Hypotheses (G): [['?', '?', '?', '?', '?', '?'], ['Rainy', '?', '?', '?', '?', '?'], ['?', 'Cold', '?', '?', '?', '?'], ['?', '?', 'High', '?', '?', '?'], ['?', '?', '?', 'Weak', '?', '?'], ['?', '?', '?', '?', 'Cool', '?'], ['?', '?', '?', '?', '?', 'Change']]
