In [4]:
import pandas as pd

# Step 1: Create the Training Dataset
data = [
    ["Sunny", "Warm", "Normal", "Strong", "Warm", "Same", "Yes"],
    ["Sunny", "Warm", "High", "Strong", "Warm", "Same", "Yes"],
    ["Rainy", "Cold", "High", "Strong", "Warm", "Change", "No"],
    ["Sunny", "Warm", "High", "Strong", "Cool", "Change", "Yes"],
    ["Overcast", "Hot", "High", "Weak", "Cool", "Same", "Yes"],
    ["Rainy", "Warm", "High", "Strong", "Warm", "Same", "No"],
    ["Sunny", "Warm", "Normal", "Strong", "Warm", "Same", "Yes"],
    ["Sunny", "Hot", "High", "Weak", "Cool", "Change", "Yes"],
    ["Overcast", "Cold", "Normal", "Weak", "Warm", "Same", "Yes"],
    ["Rainy", "Cold", "High", "Weak", "Cool", "Change", "No"]
]

# Convert to DataFrame
columns = ["Sky", "Temp", "Humidity", "Wind", "Water", "Forecast", "PlayTennis"]
df = pd.DataFrame(data, columns=columns)

# Candidate Elimination Algorithm
def candidate_elimination(training_data):
    num_attributes = len(training_data.columns) - 1  # Number of attributes

    # Initialize S (Specific Hypothesis) to the most specific
    S = ["∅"] * num_attributes  

    # Initialize G (General Hypothesis) to the most general
    G = [["?"] * num_attributes]  

    for _, row in training_data.iterrows():
        instance = row.iloc[:-1].tolist()  # Extract feature values
        label = row.iloc[-1]  # Extract class label

        if label == "Yes":  # Positive Example
            if S[0] == "∅":
                S = instance  # Initialize S with the first positive example
            else:
                for i in range(num_attributes):
                    if S[i] != instance[i]:  
                        S[i] = "?"  # Generalize S minimally

            # Remove inconsistent hypotheses from G
            G = [g for g in G if all(g[k] == "?" or g[k] == instance[k] for k in range(num_attributes))]

        else:  # Negative Example
            new_G = []
            for g in G:
                for i in range(num_attributes):
                    if g[i] == "?":  
                        new_g = g.copy()
                        new_g[i] = instance[i]  # Specializing G to avoid this negative example
                        if new_g not in new_G:
                            new_G.append(new_g)
            G.extend(new_G)

            # Keep only general hypotheses that do not classify negative examples as positive
            G = [g for g in G if any(g[k] == "?" for k in range(num_attributes))]

    return S, G

# Run the Algorithm and Print Results
S_final, G_final = candidate_elimination(df)
print("Final Specific Hypothesis (S):", S_final)
print("Final General Hypotheses (G):", G_final)


Final Specific Hypothesis (S): ['?', '?', '?', '?', '?', '?']
Final General Hypotheses (G): [['?', '?', '?', '?', '?', '?'], ['Rainy', '?', '?', '?', '?', '?'], ['?', 'Cold', '?', '?', '?', '?'], ['?', '?', 'High', '?', '?', '?'], ['?', '?', '?', 'Weak', '?', '?'], ['?', '?', '?', '?', 'Cool', '?'], ['?', '?', '?', '?', '?', 'Change']]


In [2]:
import numpy as np
import pandas as pd

# Sample training data: (Sky, Temperature, Humidity, Wind, Play Outside?)
data = [
    ["Sunny", "Warm", "Normal", "Strong", "Yes"],
    ["Sunny", "Warm", "High", "Strong", "Yes"],
    ["Rainy", "Cold", "High", "Strong", "No"],
    ["Sunny", "Warm", "Normal", "Weak", "Yes"]
]

# Convert data to a NumPy array
df = np.array(data)

# Extract features (X) and target values (Y)
X = df[:, :-1]  # All columns except last (features)
Y = df[:, -1]   # Last column (labels)

# Initialize S (Most Specific Hypothesis)
S = X[0].copy()  # Start with the first positive example

# Initialize G (Most General Hypothesis)
G = [["?" for _ in range(len(S))] for _ in range(len(S))]  # Fully generalized hypothesis

# Function to update S and G based on the training data
def candidate_elimination(X, Y):
    global S, G

    for i, sample in enumerate(X):
        if Y[i] == "Yes":  # If positive example
            for j in range(len(S)):
                if S[j] != sample[j]:  # If different, generalize S
                    S[j] = "?"
            G = [g for g in G if all(g[j] == "?" or g[j] == S[j] for j in range(len(S)))]  # Remove invalid G hypotheses

        elif Y[i] == "No":  # If negative example
            new_G = []
            for g in G:
                for j in range(len(S)):
                    if g[j] == "?":
                        if S[j] != sample[j]:  # Specialize G only where needed
                            new_hypothesis = g.copy()
                            new_hypothesis[j] = S[j]
                            new_G.append(new_hypothesis)
            G = new_G  # Update G
            
    return S, G

# Run Candidate Elimination
S_final, G_final = candidate_elimination(X, Y)

# Print Final S and G
print("Final Specific Hypothesis (S):", S_final)
print("Final General Hypothesis (G):", G_final)


Final Specific Hypothesis (S): ['Sunny' 'Warm' '?' '?']
Final General Hypothesis (G): [['Sunny', '?', '?', '?'], ['?', 'Warm', '?', '?'], ['?', '?', '?', '?'], ['Sunny', '?', '?', '?'], ['?', 'Warm', '?', '?'], ['?', '?', '?', '?'], ['Sunny', '?', '?', '?'], ['?', 'Warm', '?', '?'], ['?', '?', '?', '?'], ['Sunny', '?', '?', '?'], ['?', 'Warm', '?', '?'], ['?', '?', '?', '?']]
