In [6]:
import math
import csv


# Function to load a CSV file
def load_csv(filename):
   
        with open(filename, "r") as file:
            reader = csv.reader(file)
            dataset = list(reader)
            headers = dataset.pop(0)
        return dataset, headers


# Node class for the decision tree
class Node:
    def __init__(self, attribute=None, answer=""):
        self.attribute = attribute
        self.children = []
        self.answer = answer

# Function to split data based on a column
def split_data(data, col):
    subsets = {}
    for row in data:
        key = row[col]
        if key not in subsets:
            subsets[key] = []
        subsets[key].append(row[:col] + row[col+1:])
    return subsets

# Function to calculate entropy
def entropy(data):
    total = len(data)
    counts = {}
    for label in data:
        if label not in counts:
            counts[label] = 0
        counts[label] += 1
    ent = 0
    for count in counts.values():
        prob = count / total
        ent -= prob * math.log2(prob)
    return ent

# Function to calculate information gain
def information_gain(data, col):
    total_entropy = entropy([row[-1] for row in data])
    subsets = split_data(data, col)
    sub_entropy = 0
    for subset in subsets.values():
        prob = len(subset) / len(data)
        sub_entropy += prob * entropy([row[-1] for row in subset])
    return total_entropy - sub_entropy

# Function to build the decision tree
def build_tree(data, features):
    labels = [row[-1] for row in data]
    if len(set(labels)) == 1:
        return Node(answer=labels[0])
    
    gains = [information_gain(data, col) for col in range(len(data[0]) - 1)]
    best_feature = gains.index(max(gains))
    tree = Node(attribute=features[best_feature])
    
    subsets = split_data(data, best_feature)
    for key, subset in subsets.items():
        subtree = build_tree(subset, features[:best_feature] + features[best_feature+1:])
        tree.children.append((key, subtree))
    
    return tree

# Function to print the tree
def print_tree(node, level=0):
    if node.answer:
        print(" " * level, node.answer)
    else:
        print(" " * level, node.attribute)
        for value, child in node.children:
            print(" " * (level + 1), value)
            print_tree(child, level + 2)

# Function to classify a new instance using the tree
def classify(node, instance, features):
    if node.answer:
        return node.answer
    feature_index = features.index(node.attribute)
    for value, child in node.children:
        if instance[feature_index] == value:
            return classify(child, instance[:feature_index] + instance[feature_index+1:], features[:feature_index] + features[feature_index+1:])
    return None

# Main program
if __name__ == "__main__":

    
    # Load training data
    train_data, features = load_csv("P4_data.csv")
    if not train_data:
        print("Training data not available. Exiting program.")
    else:
        # Build decision tree
        decision_tree = build_tree(train_data, features)
        
        print("The decision tree for the dataset using ID3 algorithm is")
        print_tree(decision_tree)
        
        # Load test data
        test_data, _ = load_csv("P4_data.csv")
        for instance in test_data:
            print("The test instance :", instance)
            prediction = classify(decision_tree, instance, features)
            print("The predicted label :", prediction)


The decision tree for the dataset using ID3 algorithm is
 Outlook
  sunny
   Humidity
    high
     no
    normal
     yes
  overcast
   yes
  rain
   Wind
    weak
     yes
    strong
     no
The test instance : ['sunny', 'hot', 'high', 'weak', 'no']
The predicted label : no
The test instance : ['sunny', 'hot', 'high', 'strong', 'no']
The predicted label : no
The test instance : ['overcast', 'hot', 'high', 'weak', 'yes']
The predicted label : yes
The test instance : ['rain', 'mild', 'high', 'weak', 'yes']
The predicted label : yes
The test instance : ['rain', 'cool', 'normal', 'weak', 'yes']
The predicted label : yes
The test instance : ['rain', 'cool', 'normal', 'strong', 'no']
The predicted label : no
The test instance : ['overcast', 'cool', 'normal', 'strong', 'yes']
The predicted label : yes
The test instance : ['sunny', 'mild', 'high', 'weak', 'no']
The predicted label : no
The test instance : ['sunny', 'cool', 'normal', 'weak', 'yes']
The predicted label : yes
The test instance 