In [2]:
import pandas as pd
def generalize(hypothesis, example):
    for i in range(len(hypothesis)):
        if hypothesis[i] != example[i]:
            hypothesis[i] = '?'
    return hypothesis
def specialize(hypothesis, example, attributes):
    specialized_hypotheses = []
    for i in range(len(hypothesis)):
        if hypothesis[i] == '?':  # Only specialize attributes that are '?'
            for value in attributes[i]:
                if value != example[i]:
                    new_hypothesis = hypothesis[:]
                    new_hypothesis[i] = value
                    specialized_hypotheses.append(new_hypothesis)
    return specialized_hypotheses
def candidate_elimination_algorithm(data):
    attributes = [data[col].unique().tolist() for col in data.columns[:-1]]
    S = [['ϕ'] * (len(data.columns) - 1)]  # Most specific hypothesis
    G = [['?'] * (len(data.columns) - 1)]  # Most general hypothesis
    for index, row in data.iterrows():
        example = row[:-1].tolist()
        label = row[-1]
        if label == "Yes":  # Positive example
            G = [g for g in G if all(g[i] == '?' or g[i] == example[i] for i in range(len(example)))]
            # Update S
            for s in S:
                for i in range(len(s)):
                    if s[i] == 'ϕ':
                        s[i] = example[i]
                    elif s[i] != example[i]:
                        s[i] = '?'
        elif label == "No":  
            S = [s for s in S if any(s[i] == '?' or s[i] != example[i] for i in range(len(example)))]
            new_G = []
            for g in G:
                new_G.extend(specialize(g, example, attributes))
            G = new_G
    G = [g for g in G if not any(
        all(g[i] == other[i] or other[i] == '?' for i in range(len(g))) for other in G if g != other)]
    S = [s for s in S if not any(
        all(other[i] == s[i] or s[i] == '?' for i in range(len(s))) for other in S if s != other)]
    return S, G
data = pd.read_csv("agri.csv")
S, G = candidate_elimination_algorithm(data)
print("Most Specific Hypothesis (S):")
for hypothesis in S:
    print(hypothesis)
print("\nMost General Hypothesis (G):")
for hypothesis in G:
    print(hypothesis)


  label = row[-1]


Most Specific Hypothesis (S):
['ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ']

Most General Hypothesis (G):
['?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?']


In [3]:
def read_csv(filename):
    with open(filename, 'r') as file:
        lines = file.readlines()
    data = [line.strip().split(',') for line in lines]
    headers = data[0]  # Column names
    rows = data[1:]  # Data rows
    return headers, rows

def generalize(hypothesis, example):
    for i in range(len(hypothesis)):
        if hypothesis[i] != example[i]:
            hypothesis[i] = '?'
    return hypothesis

def specialize(hypothesis, example, attributes):
    specialized_hypotheses = []
    for i in range(len(hypothesis)):
        if hypothesis[i] == '?':  # Only specialize attributes that are '?'
            for value in attributes[i]:
                if value != example[i]:
                    new_hypothesis = hypothesis[:]
                    new_hypothesis[i] = value
                    specialized_hypotheses.append(new_hypothesis)
    return specialized_hypotheses

def candidate_elimination_algorithm(headers, data):
    attributes = [[] for _ in range(len(headers) - 1)]
    for row in data:
        for i in range(len(row) - 1):
            if row[i] not in attributes[i]:
                attributes[i].append(row[i])
    
    S = [['ϕ'] * (len(headers) - 1)]  # Most specific hypothesis
    G = [['?'] * (len(headers) - 1)]  # Most general hypothesis
    
    for row in data:
        example = row[:-1]  # Features
        label = row[-1]  # Class label
        
        if label == 'e':  # Positive example
            G = [g for g in G if all(g[i] == '?' or g[i] == example[i] for i in range(len(example)))]
            # Update S
            for s in S:
                for i in range(len(s)):
                    if s[i] == 'ϕ':
                        s[i] = example[i]
                    elif s[i] != example[i]:
                        s[i] = '?'
        elif label == "No":  # Negative example
            S = [s for s in S if any(s[i] == '?' or s[i] != example[i] for i in range(len(example)))]
            new_G = []
            for g in G:
                new_G.extend(specialize(g, example, attributes))
            G = new_G
    
    G = [g for g in G if not any(
        all(g[i] == other[i] or other[i] == '?' for i in range(len(g))) for other in G if g != other)]
    S = [s for s in S if not any(
        all(other[i] == s[i] or s[i] == '?' for i in range(len(s))) for other in S if s != other)]
    
    return S, G

headers, data = read_csv("agri.csv")
S, G = candidate_elimination_algorithm(headers, data)

print("Most Specific Hypothesis (S):")
for hypothesis in S:
    print(hypothesis)

print("\nMost General Hypothesis (G):")
for hypothesis in G:
    print(hypothesis)


Most Specific Hypothesis (S):
['ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ']

Most General Hypothesis (G):
['?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?']
