In [None]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split


In [None]:
# Node class
class Node:
    def __init__(self,value=None,left=None,right=None,feature=None,threshold=None):
        self.value = value 
        self.left  = left
        self.right = right
        self.feature = feature
        self.threshold = threshold
        
    def is_leaf_node(self):
        return not self.left and not self.right 

# decision tree implementation
class Decision_Tree:
    
    def __init__(self,max_depth=5,min_samples_split=2):
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.root = None
        
    def common_label(self,y):
        
        if len(y)==0:
            return None
        count = np.bincount(y)
        return np.argmax(count)
    
    def gini_impurity(self,y):
        if len(y)==0:
            return 0
        counts = np.bincount(y)
        probs = counts/len(y)
        return 1-np.sum(probs**2)
    
    def entropy__(self,y):
        if len(y)==0:
            return 0
        counts = np.bincount(y)
        probs = counts/len(y)
        return -np.sum(probs * np.log2(probs + 1e-9))
    
    
        
    def best_split_(self,X,y):
        
        best_feature,best_threshold,best_gain = None,None,-1
        parent_impurity = self.entropy__(y)
        samples,features = X.shape
        
        for feature in range(features):
            thresholds = np.unique(X[:,feature])
            for threshold in thresholds:
                left_idx = X[:,feature] <=threshold
                right_idx = X[:,feature] > threshold
                
                if np.sum(left_idx) == 0 or np.sum(right_idx) == 0:
                    continue
                
                left_gini  = self.entropy__(y[left_idx])
                right_gini = self.entropy__(y[right_idx])
                
                weighted_gini = (len(y[left_idx]) * left_gini + len(y[right_idx]) * right_gini) / len(y)
                
                information_gain = parent_impurity - weighted_gini
                
                if information_gain > best_gain:
                    best_gain = information_gain
                    best_feature = feature
                    best_threshold = threshold
        return (best_feature,best_threshold)
                    
        
        
    def grow_tree_(self,X,y,depth=0):
        
        samples = len(y)
        labels = len(np.unique(y))
        
        # pre-pruning
        if depth >= self.max_depth or labels==1 or samples <=self.min_samples_split:
            leaf_val = self.common_label(y)
            return Node(value=leaf_val)
        
        feature,threshold = self.best_split_(X,y)
        
        if feature is None:
            return Node(value = self.common_label(y))
        
        
        left_idx = X[:, feature] <= threshold
        right_idx = X[:, feature] > threshold
        
        left = self.grow_tree_(X[left_idx],y[left_idx],depth+1)
        right = self.grow_tree_(X[right_idx],y[right_idx],depth+1)
        
        return Node(feature=feature,threshold=threshold,left=left,right=right)
    
    def traverse_tree(self,X,node):
        
        if node and node.is_leaf_node():
            return node.value
        
        if X[node.feature] <= node.threshold:    
            return self.traverse_tree(X,node.left)
        else:
            return self.traverse_tree(X,node.right)
            
        
    def fit(self,X,y):
        self.root = self.grow_tree_(X,y)
    
    
    def predict(self,X):
        return np.array([self.traverse_tree(x,self.root) for x in X])
        
    

In [None]:
from sklearn.tree import  DecisionTreeClassifier


iris = datasets.load_iris()
X = iris.data  # shape: (150, 4)
y = iris.target  # shape: (150,)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Fit your Decision_Tree
model = DecisionTreeClassifier(max_depth=1,min_samples_split=10)

model.fit(X_train, y_train)

preds = model.predict(X_test)

# Accuracy
acc = np.sum(preds == y_test) / len(y_test)
print(f"Accuracy: {acc:.2f}")

In [None]:
iris = datasets.load_iris()
X = iris.data  # shape: (150, 4)
y = iris.target  # shape: (150,)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Fit your Decision_Tree
tree = Decision_Tree(max_depth=1, min_samples_split=10)
tree.fit(X_train, y_train)

# Predictions
preds = tree.predict(X_test)

# Accuracy
acc = np.sum(preds == y_test) / len(y_test)
print(f"Accuracy: {acc:.2f}")