In [20]:
import math


def load_dataset(filename):
    with open(filename, 'r') as file:
        lines = file.readlines()
        dataset = [line.strip().split(',') for line in lines]
        attributes = dataset[0][1:]  # Extract attribute names from the first row
        dataset = dataset[1:]  # Remove the first row (attribute names)
        return dataset, attributes


def create_tree(data, attributes):
    class_labels = [instance[-1] for instance in data]
    if len(set(class_labels)) == 1:
        return class_labels[0]
    if len(attributes) == 0:
        majority_class = get_majority_class(class_labels)
        return majority_class
    best_attribute = select_best_attribute(data, attributes)
    tree = {best_attribute: {}}
    attribute_values = get_attribute_values(data, best_attribute)
    for value in attribute_values:
        subset = get_instances_with_attribute_value(data, best_attribute, value)
        subset_attributes = attributes[:]
        subset_attributes.remove(best_attribute)
        tree[best_attribute][value] = create_tree(subset, subset_attributes)
    return tree


def select_best_attribute(data, attributes):
    best_attribute = None
    best_information_gain = -1.0
    for attribute in attributes:
        attribute_values = get_attribute_values(data, attribute)
        subset_entropy = 0.0
        for value in attribute_values:
            subset = get_instances_with_attribute_value(data, attribute, value)
            subset_probability = len(subset) / float(len(data))
            subset_entropy += subset_probability * calculate_entropy(subset)
        information_gain = calculate_entropy(data) - subset_entropy
        if information_gain > best_information_gain:
            best_information_gain = information_gain
            best_attribute = attribute
    return best_attribute


def get_attribute_values(data, attribute):
    attribute_index = -1
    for i in range(len(data[0])):
        if data[0][i] == attribute:
            attribute_index = i
            break
    attribute_values = []
    for instance in data[1:]:
        attribute_values.append(instance[attribute_index])
    return list(set(attribute_values))


def get_instances_with_attribute_value(data, attribute, value):
    attribute_index = -1
    for i in range(len(data[0])):
        if data[0][i] == attribute:
            attribute_index = i
            break
    instances = []
    for instance in data[1:]:
        if instance[attribute_index] == value:
            instances.append(instance)
    return instances


def calculate_entropy(data):
    class_labels = [instance[-1] for instance in data]
    label_counts = count_class_labels(class_labels)
    entropy = 0.0
    for count in label_counts.values():
        probability = count / len(data)
        entropy -= probability * math.log2(probability)
    return entropy


def count_class_labels(class_labels):
    label_counts = {}
    for label in class_labels:
        if label in label_counts:
            label_counts[label] += 1
        else:
            label_counts[label] = 1
    return label_counts


def get_majority_class(class_labels):
    label_counts = count_class_labels(class_labels)
    majority_class = max(label_counts, key=label_counts.get)
    return majority_class


# Load the dataset
dataset, attributes = load_dataset('student-mat.csv')

# Build the ID3 decision tree
tree = create_tree(dataset, attributes)

# Print the tree
print(tree)


{'sex': {'19': '19', '18': '18', '11': '11', '10': '10', '13': '13', '12': '12', '15': '15', '14': '14', '17': '17', '16': '16', '0': '0', '5': '5', '20': '20', '7': '7', '6': '6', '9': '9', '8': '8', '4': '4'}}


In [3]:
import math


def load_dataset(filename):
    with open(filename, 'r') as file:
        lines = file.readlines()
        dataset = [line.strip().split(',') for line in lines]
        attributes = dataset[0][1:]  # Extract attribute names from the first row
        dataset = dataset[1:]  # Remove the first row (attribute names)
        return dataset, attributes


def create_tree(data, attributes):
    class_labels = [instance[-1] for instance in data]
    if len(set(class_labels)) == 1:
        return class_labels[0]
    if len(attributes) == 0:
        majority_class = get_majority_class(class_labels)
        return majority_class
    best_attribute = select_best_attribute(data, attributes)
    tree = {best_attribute: {}}
    attribute_values = get_attribute_values(data, best_attribute)
    for value in attribute_values:
        subset = get_instances_with_attribute_value(data, best_attribute, value)
        subset_attributes = attributes[:]
        subset_attributes.remove(best_attribute)
        tree[best_attribute][value] = create_tree(subset, subset_attributes)
    return tree


def select_best_attribute(data, attributes):
    best_attribute = None
    best_information_gain = -1.0
    for attribute in attributes:
        attribute_values = get_attribute_values(data, attribute)
        subset_entropy = 0.0
        for value in attribute_values:
            subset = get_instances_with_attribute_value(data, attribute, value)
            subset_probability = len(subset) / float(len(data))
            subset_entropy += subset_probability * calculate_entropy(subset)
        information_gain = calculate_entropy(data) - subset_entropy
        if information_gain > best_information_gain:
            best_information_gain = information_gain
            best_attribute = attribute
    return best_attribute


def get_attribute_values(data, attribute):
    attribute_index = -1
    for i in range(len(data[0])):
        if data[0][i] == attribute:
            attribute_index = i
            break
    attribute_values = []
    for instance in data[1:]:
        attribute_values.append(instance[attribute_index])
    return list(set(attribute_values))


def get_instances_with_attribute_value(data, attribute, value):
    attribute_index = -1
    for i in range(len(data[0])):
        if data[0][i] == attribute:
            attribute_index = i
            break
    instances = []
    for instance in data[1:]:
        if instance[attribute_index] == value:
            instances.append(instance)
    return instances


def calculate_entropy(data):
    class_labels = [instance[-1] for instance in data]
    label_counts = count_class_labels(class_labels)
    entropy = 0.0
    for count in label_counts.values():
        probability = count / len(data)
        entropy -= probability * math.log2(probability)
    return entropy


def count_class_labels(class_labels):
    label_counts = {}
    for label in class_labels:
        if label in label_counts:
            label_counts[label] += 1
        else:
            label_counts[label] = 1
    return label_counts


def get_majority_class(class_labels):
    label_counts = count_class_labels(class_labels)
    majority_class = max(label_counts, key=label_counts.get)
    return majority_class


# Load the dataset
dataset, attributes = load_dataset('StudentsPerformance.csv')

# Build the ID3 decision tree
tree = create_tree(dataset, attributes)

# Print the tree
print(tree)


{'"race/ethnicity"': {'"95"': '"95"', '"42"': '"42"', '"81"': '"81"', '"73"': '"73"', '"22"': '"22"', '"78"': '"78"', '"55"': '"55"', '"65"': '"65"', '"89"': '"89"', '"30"': '"30"', '"96"': '"96"', '"49"': '"49"', '"38"': '"38"', '"41"': '"41"', '"80"': '"80"', '"70"': '"70"', '"79"': '"79"', '"56"': '"56"', '"88"': '"88"', '"97"': '"97"', '"48"': '"48"', '"39"': '"39"', '"64"': '"64"', '"40"': '"40"', '"15"': '"15"', '"87"': '"87"', '"71"': '"71"', '"47"': '"47"', '"76"': '"76"', '"57"': '"57"', '"36"': '"36"', '"86"': '"86"', '"98"': '"98"', '"90"': '"90"', '"67"': '"67"', '"37"': '"37"', '"46"': '"46"', '"50"': '"50"', '"77"': '"77"', '"27"': '"27"', '"58"': '"58"', '"85"': '"85"', '"99"': '"99"', '"66"': '"66"', '"91"': '"91"', '"61"': '"61"', '"34"': '"34"', '"10"': '"10"', '"45"': '"45"', '"84"': '"84"', '"59"': '"59"', '"74"': '"74"', '"51"': '"51"', '"69"': '"69"', '"92"': '"92"', '"35"': '"35"', '"60"': '"60"', '"44"': '"44"', '"19"': '"19"', '"83"': '"83"', '"75"': '"75"', '"

In [2]:
import math


def load_dataset(filename):
    with open(filename, 'r') as file:
        lines = file.readlines()
        dataset = [line.strip().split(',') for line in lines]
        attributes = dataset[0][:-1]  # Extract attribute names from the first row
        dataset = dataset[1:]  # Remove the first row (attribute names)
        return dataset, attributes


def create_tree(data, attributes):
    class_labels = [instance[-1] for instance in data]
    if len(set(class_labels)) == 1:
        return class_labels[0]
    if len(attributes) == 0:
        majority_class = get_majority_class(class_labels)
        return majority_class
    best_attribute, ranked_attributes = select_best_attribute(data, attributes)
    tree = {best_attribute: {}}
    for value in ranked_attributes:
        subset = get_instances_with_attribute_value(data, best_attribute, value)
        subset_attributes = attributes[:]
        subset_attributes.remove(best_attribute)
        tree[best_attribute][value] = create_tree(subset, subset_attributes)
    return tree


def select_best_attribute(data, attributes):
    best_attribute = None
    best_information_gain = -1.0
    ranked_attributes = []
    for attribute in attributes:
        attribute_values = get_attribute_values(data, attribute)
        subset_entropy = 0.0
        for value in attribute_values:
            subset = get_instances_with_attribute_value(data, attribute, value)
            subset_probability = len(subset) / float(len(data))
            subset_entropy += subset_probability * calculate_entropy(subset)
        information_gain = calculate_entropy(data) - subset_entropy
        ranked_attributes.append((attribute, information_gain))
    ranked_attributes.sort(key=lambda x: x[1], reverse=True)
    return ranked_attributes[0][0], [attr[0] for attr in ranked_attributes]


def get_attribute_values(data, attribute):
    attribute_index = attributes.index(attribute)
    attribute_values = []
    for instance in data:
        attribute_values.append(instance[attribute_index])
    return list(set(attribute_values))


def get_instances_with_attribute_value(data, attribute, value):
    attribute_index = attributes.index(attribute)
    instances = []
    for instance in data:
        if instance[attribute_index] == value:
            instances.append(instance)
    return instances


def calculate_entropy(data):
    class_labels = [instance[-1] for instance in data]
    label_counts = count_class_labels(class_labels)
    entropy = 0.0
    for count in label_counts.values():
        probability = count / len(data)
        entropy -= probability * math.log2(probability)
    return entropy


def count_class_labels(class_labels):
    label_counts = {}
    for label in class_labels:
        if label in label_counts:
            label_counts[label] += 1
        else:
            label_counts[label] = 1
    return label_counts


def get_majority_class(class_labels):
    if len(class_labels) == 0:
        return None
    label_counts = count_class_labels(class_labels)
    majority_class = max(label_counts, key=label_counts.get)
    return majority_class

# Load the dataset
dataset, attributes = load_dataset('iris.csv')

# Build the ID3 decision tree
tree = create_tree(dataset, attributes)

# Print the tree
print(tree)


{'sepal_length': {'sepal_length': {'petal_length': {'petal_length': {'petal_width': {'petal_width': {'sepal_width': {'sepal_width': None}}, 'sepal_width': {'sepal_width': {'sepal_width': None}}}}, 'petal_width': {'petal_width': {'petal_width': {'sepal_width': {'sepal_width': None}}, 'sepal_width': {'sepal_width': {'sepal_width': None}}}}, 'sepal_width': {'petal_width': {'petal_width': {'sepal_width': {'sepal_width': None}}, 'sepal_width': {'sepal_width': {'sepal_width': None}}}}}}, 'sepal_width': {'petal_length': {'petal_length': {'petal_width': {'petal_width': {'sepal_width': {'sepal_width': None}}, 'sepal_width': {'sepal_width': {'sepal_width': None}}}}, 'petal_width': {'petal_width': {'petal_width': {'sepal_width': {'sepal_width': None}}, 'sepal_width': {'sepal_width': {'sepal_width': None}}}}, 'sepal_width': {'petal_width': {'petal_width': {'sepal_width': {'sepal_width': None}}, 'sepal_width': {'sepal_width': {'sepal_width': None}}}}}}, 'petal_length': {'petal_length': {'petal_leng