##**Import Required Libraries**

In [1]:
import numpy as np
import math
import csv

##**Read Data from CSV**

In [11]:
def read_data(filename):
    with open(filename, 'r') as f:
        data = list(csv.reader(f))
    return data[0], np.array(data[1:])

##**Calculate Entropy**

In [7]:
def entropy(S):
    counts = np.unique(S, return_counts=True)[1] / len(S)
    return -np.sum(counts * np.log2(counts))

##**Compute Gain Ratio**

In [12]:
def gain_ratio(data, col):
    items, dict = {item: data[data[:, col] == item] for item in np.unique(data[:, col])}, {}
    total_size = len(data)
    iv = -np.sum([len(sub) / total_size * np.log2(len(sub) / total_size) for sub in dict.values()])
    return (entropy(data[:, -1]) - np.sum([len(sub) / total_size * entropy(sub[:, -1]) for sub in dict.values()])) / iv

##**Define Node Class**



In [13]:
class Node:
    def __init__(self, answer=""):
        self.attribute = answer
        self.children = []
        self.answer = answer

##**Build Decision Tree**

In [14]:
def create_node(data, metadata):
    if len(np.unique(data[:, -1])) == 1:
        return Node(np.unique(data[:, -1])[0])

    gains = [gain_ratio(data, col) for col in range(data.shape[1] - 1)]
    best_col = np.argmax(gains)
    node = Node(metadata[best_col])
    metadata = np.delete(metadata, best_col)
    items, dict = {item: data[data[:, best_col] == item] for item in np.unique(data[:, best_col])}, {}

    for item in dict:
        node.children.append((item, create_node(dict[item], metadata)))
    return node

##**Print the Decision Tree**

In [15]:
def print_tree(node, level=0):
    print("  " * level, node.answer or node.attribute)
    for value, child in node.children:
        print("  " * (level + 1), value)
        print_tree(child, level + 2)

##**Run the Algorithm**

In [16]:
# Load Data
metadata, traindata = read_data("/content/drive/MyDrive/enjoysport.csv")

# Create Decision Tree
node = create_node(traindata, metadata)

# Print Decision Tree
print_tree(node)


 sky


  return (entropy(data[:, -1]) - np.sum([len(sub) / total_size * entropy(sub[:, -1]) for sub in dict.values()])) / iv


##**Final program**

In [17]:
import numpy as np
import math
import csv

# Read and return data from CSV
# The function now accepts a filename as an argument
def read_data(filename):
    with open(filename, 'r') as f:
        data = list(csv.reader(f))
    return data[0], np.array(data[1:])


# Calculate entropy
def entropy(S):
    counts = np.unique(S, return_counts=True)[1] / len(S)
    return -np.sum(counts * np.log2(counts))

# Subtables and gain ratio calculation
def gain_ratio(data, col):
    items, dict = {item: data[data[:, col] == item] for item in np.unique(data[:, col])}, {}
    total_size = len(data)
    iv = -np.sum([len(sub) / total_size * np.log2(len(sub) / total_size) for sub in dict.values()])
    return (entropy(data[:, -1]) - np.sum([len(sub) / total_size * entropy(sub[:, -1]) for sub in dict.values()])) / iv

# Create decision tree node
def create_node(data, metadata):
    if len(np.unique(data[:, -1])) == 1:
        return Node(np.unique(data[:, -1])[0])

    gains = [gain_ratio(data, col) for col in range(data.shape[1] - 1)]
    best_col = np.argmax(gains)
    node = Node(metadata[best_col])
    metadata = np.delete(metadata, best_col)
    items, dict = {item: data[data[:, best_col] == item] for item in np.unique(data[:, best_col])}, {}

    for item in dict:
        node.children.append((item, create_node(dict[item], metadata)))
    return node

# Print the decision tree
def print_tree(node, level=0):
    print("  " * level, node.answer or node.attribute)
    for value, child in node.children:
        print("  " * (level + 1), value)
        print_tree(child, level + 2)

class Node:
    def __init__(self, answer=""):
        self.attribute = answer
        self.children = []
        self.answer = answer

# Main
metadata, traindata = read_data("/content/drive/MyDrive/enjoysport.csv") # Call read_data with filename
node = create_node(traindata, metadata)
print_tree(node)

 sky


  return (entropy(data[:, -1]) - np.sum([len(sub) / total_size * entropy(sub[:, -1]) for sub in dict.values()])) / iv
