In [1]:
import pandas as pd
import numpy as np

# Load the data 
df = pd.read_csv('diabetes.csv')
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [2]:
# Function to calculate entropy
def entropy(y):
    unique_labels, counts = np.unique(y, return_counts=True)
    probabilities = counts / len(y)
    entropy = -np.sum(probabilities * np.log2(probabilities))
    return entropy

In [3]:
# Function to calculate Gini impurity
def gini_impurity(y):
    unique_labels, counts = np.unique(y, return_counts=True)
    probabilities = counts / len(y)
    gini = 1 - np.sum(probabilities ** 2)
    return gini

In [4]:
# Function to split dataset based on a given feature and threshold
def split_dataset(X, y, feature_index, threshold):
    left_mask = X[:, feature_index] <= threshold  #train
    right_mask = ~left_mask   #test
    X_left, y_left = X[left_mask], y[left_mask]
    X_right, y_right = X[right_mask], y[right_mask]
    return X_left, y_left, X_right, y_right

In [11]:
# Function to find the best split based on information gain (entropy)
def find_best_split_entropy(X, y):
    best_entropy = float('inf')
    best_feature_index = None
    best_threshold = None
    
    for feature_index in range(X.shape[1]):
        thresholds = np.unique(X[:, feature_index]) #array thresholds containing all the unique values 
        for threshold in thresholds: 
            X_left, y_left, X_right, y_right = split_dataset(X, y, feature_index, threshold)
            total_samples = len(y)
            entropy_left = entropy(y_left)
            entropy_right = entropy(y_right)
            total_entropy = (len(y_left) / total_samples) * entropy_left + (len(y_right) / total_samples) * entropy_right
            if total_entropy < best_entropy:
                best_entropy = total_entropy
                best_feature_index = feature_index
                best_threshold = threshold

    return best_feature_index, best_threshold

In [12]:
# Function to find the best split based on Gini impurity
def find_best_split_gini(X, y):
    best_gini = float('inf')
    best_feature_index = None
    best_threshold = None
    
    for feature_index in range(X.shape[1]):
        thresholds = np.unique(X[:, feature_index])
        for threshold in thresholds:
            X_left, y_left, X_right, y_right = split_dataset(X, y, feature_index, threshold)
            total_samples = len(y)
            gini_left = gini_impurity(y_left)
            gini_right = gini_impurity(y_right)
            total_gini = (len(y_left) / total_samples) * gini_left + (len(y_right) / total_samples) * gini_right
            if total_gini < best_gini:
                best_gini = total_gini
                best_feature_index = feature_index
                best_threshold = threshold
    
    return best_feature_index, best_threshold



In [13]:
# Function to build decision tree recursively
def build_tree(X, y, max_depth, criterion='entropy', depth=0):
    if criterion == 'entropy':
        find_best_split = find_best_split_entropy
    elif criterion == 'gini':
        find_best_split = find_best_split_gini
    else:
        raise ValueError("Invalid criterion. Choose either 'entropy' or 'gini'.")
    
    best_feature_index, best_threshold = find_best_split(X, y)
    if best_feature_index is None or depth >= max_depth:
        return {'prediction': np.argmax(np.bincount(y))}
    
    X_left, y_left, X_right, y_right = split_dataset(X, y, best_feature_index, best_threshold)
    
    # Handle case when no samples left
    if len(y_left) == 0:
        left_subtree = {'prediction': np.argmax(np.bincount(y))}
    else:
        left_subtree = build_tree(X_left, y_left, max_depth, criterion, depth+1)
    
    if len(y_right) == 0:
        right_subtree = {'prediction': np.argmax(np.bincount(y))}
    else:
        right_subtree = build_tree(X_right, y_right, max_depth, criterion, depth+1)
    
    return {
        'feature_index': best_feature_index,
        'threshold': best_threshold,
        'left': left_subtree,
        'right': right_subtree
    }




In [14]:
# Function to make predictions using the decision tree
def predict(tree, X):
    if 'prediction' in tree:
        return np.array([tree['prediction']] * len(X))
    
    feature_index = tree['feature_index']
    threshold = tree['threshold']
    
    mask = X[:, feature_index] <= threshold
    X_left, X_right = X[mask], X[~mask]
    
    if len(X_left) == 0:
        return predict(tree['right'], X_right)
    elif len(X_right) == 0:
        return predict(tree['left'], X_left)
    else:
        predictions = np.empty(len(X), dtype=int)
        predictions[mask] = predict(tree['left'], X_left)
        predictions[~mask] = predict(tree['right'], X_right)
        return predictions



In [15]:
import numpy as np
X = df.drop('Outcome', axis=1).values
y = df['Outcome'].values

# Define your train-test split function
def train_test_split_custom(X, y, test_size=0.2, random_state=42):
    if random_state is not None:
        np.random.seed(random_state)
    indices = np.arange(len(y))
    np.random.shuffle(indices)
    split_index = int((1 - test_size) * len(y))
    X_train, X_test = X[indices[:split_index]], X[indices[split_index:]]
    y_train, y_test = y[indices[:split_index]], y[indices[split_index:]]
    return X_train, X_test, y_train, y_test

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split_custom(X, y, test_size=0.2, random_state=42)

# Build decision tree with entropy criterion
max_depth = 5
entropy_tree = build_tree(X_train, y_train, max_depth=max_depth, criterion='entropy')

# Build decision tree with Gini impurity criterion
gini_tree = build_tree(X_train, y_train, max_depth=max_depth, criterion='gini')

# Make predictions
entropy_pred = predict(entropy_tree, X_test)
gini_pred = predict(gini_tree, X_test)

# Calculate accuracies manually
entropy_accuracy = np.mean(entropy_pred == y_test)
gini_accuracy = np.mean(gini_pred == y_test)

print("Accuracy with entropy criterion:", entropy_accuracy)
print("Accuracy with Gini impurity criterion:", gini_accuracy)


Accuracy with entropy criterion: 0.7207792207792207
Accuracy with Gini impurity criterion: 0.7532467532467533


In [16]:
s=pd.DataFrame(entropy_pred)
s['1']=gini_pred
s['2']=y_test
s[s[0]==s['2']]    #111*100/154 entropy accuracy

Unnamed: 0,0,1,2
0,0,0,0
1,0,0,0
2,0,0,0
4,1,1,1
5,0,0,0
...,...,...,...
146,1,1,1
148,0,0,0
149,0,0,0
150,0,0,0


In [17]:
s

Unnamed: 0,0,1,2
0,0,0,0
1,0,0,0
2,0,0,0
3,1,1,0
4,1,1,1
...,...,...,...
149,0,0,0
150,0,0,0
151,0,1,1
152,0,0,1


In [18]:
s[s['1']==s['2']]  #116*100/154 gini accuracy

Unnamed: 0,0,1,2
0,0,0,0
1,0,0,0
2,0,0,0
4,1,1,1
5,0,0,0
...,...,...,...
148,0,0,0
149,0,0,0
150,0,0,0
151,0,1,1


In [19]:
def plot_custom_tree(tree, feature_names, class_names, depth=0):
    if 'prediction' in tree:
        print('\t' * depth, "Prediction:", class_names[tree['prediction']])
        return

    feature_index = tree['feature_index']
    threshold = tree['threshold']
    
    print('\t' * depth, "Feature:", feature_names[feature_index], ", Threshold:", threshold)
    
    print('\t' * depth, "Left:")
    plot_custom_tree(tree['left'], feature_names, class_names, depth + 1)
    
    print('\t' * depth, "Right:")
    plot_custom_tree(tree['right'], feature_names, class_names, depth + 1)

# Plot decision tree for entropy criterion
print("Decision Tree - Entropy Criterion:")
plot_custom_tree(entropy_tree, df.columns[:-1], ['0', '1'])

# Plot decision tree for Gini impurity criterion
print("\nDecision Tree - Gini Impurity Criterion:")
plot_custom_tree(gini_tree, df.columns[:-1], ['0', '1'])


Decision Tree - Entropy Criterion:
 Feature: Glucose , Threshold: 123.0
 Left:
	 Feature: BMI , Threshold: 26.8
	 Left:
		 Feature: Glucose , Threshold: 106.0
		 Left:
			 Feature: Pregnancies , Threshold: 0.0
			 Left:
				 Feature: Pregnancies , Threshold: 0.0
				 Left:
					 Prediction: 0
				 Right:
					 Prediction: 0
			 Right:
				 Feature: Pregnancies , Threshold: 1.0
				 Left:
					 Prediction: 0
				 Right:
					 Prediction: 0
		 Right:
			 Feature: BMI , Threshold: 22.9
			 Left:
				 Feature: Pregnancies , Threshold: 6.0
				 Left:
					 Prediction: 0
				 Right:
					 Prediction: 1
			 Right:
				 Feature: Pregnancies , Threshold: 0.0
				 Left:
					 Prediction: 0
				 Right:
					 Prediction: 0
	 Right:
		 Feature: Age , Threshold: 29.0
		 Left:
			 Feature: BMI , Threshold: 45.3
			 Left:
				 Feature: BMI , Threshold: 38.2
				 Left:
					 Prediction: 0
				 Right:
					 Prediction: 0
			 Right:
				 Feature: Glucose , Threshold: 119.0
				 Left:
					 Prediction: 1


In [28]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the data 
df = pd.read_csv('diabetes.csv')
# Function to calculate entropy
def calculate_entropy(y):
    unique_labels, counts = np.unique(y, return_counts=True)
    probabilities = counts / len(y)
    entropy = -np.sum(probabilities * np.log2(probabilities))
    return entropy

# Function to calculate Gini impurity
def calculate_gini_impurity(y):
    unique_labels, counts = np.unique(y, return_counts=True)
    probabilities = counts / len(y)
    gini = 1 - np.sum(probabilities ** 2)
    return gini

# Function to split dataset based on a given feature and threshold
def split_dataset(X, y, feature_index, threshold):
    left_mask = X[:, feature_index] <= threshold
    right_mask = ~left_mask
    X_left, y_left = X[left_mask], y[left_mask]
    X_right, y_right = X[right_mask], y[right_mask]
    return X_left, y_left, X_right, y_right

# Function to find the best split based on information gain (entropy)
def find_best_split_entropy(X, y):
    best_entropy = float('inf')
    best_feature_index = None
    best_threshold = None
    
    for feature_index in range(X.shape[1]):
        thresholds = np.unique(X[:, feature_index])
        for threshold in thresholds:
            X_left, y_left, X_right, y_right = split_dataset(X, y, feature_index, threshold)
            total_samples = len(y)
            entropy_left = calculate_entropy(y_left)
            entropy_right = calculate_entropy(y_right)
            total_entropy = (len(y_left) / total_samples) * entropy_left + (len(y_right) / total_samples) * entropy_right
            if total_entropy < best_entropy:
                best_entropy = total_entropy
                best_feature_index = feature_index
                best_threshold = threshold
    
    return best_feature_index, best_threshold

# Function to find the best split based on Gini impurity
def find_best_split_gini(X, y):
    best_gini = float('inf')
    best_feature_index = None
    best_threshold = None
    
    for feature_index in range(X.shape[1]):
        thresholds = np.unique(X[:, feature_index])
        for threshold in thresholds:
            X_left, y_left, X_right, y_right = split_dataset(X, y, feature_index, threshold)
            total_samples = len(y)
            gini_left = calculate_gini_impurity(y_left)
            gini_right = calculate_gini_impurity(y_right)
            total_gini = (len(y_left) / total_samples) * gini_left + (len(y_right) / total_samples) * gini_right
            if total_gini < best_gini:
                best_gini = total_gini
                best_feature_index = feature_index
                best_threshold = threshold
    
    return best_feature_index, best_threshold

def build_tree_verbose(X, y, max_depth, criterion='entropy', depth=0):
    if criterion == 'entropy':
        find_best_split = find_best_split_entropy
        impurity_func = calculate_entropy
    elif criterion == 'gini':
        find_best_split = find_best_split_gini
        impurity_func = calculate_gini_impurity
    else:
        raise ValueError("Invalid criterion. Choose either 'entropy' or 'gini'.")
    
    print('\t' * depth, "Depth:", depth)
    impurity = impurity_func(y)
    print('\t' * depth, "Impurity (", criterion, "):", impurity)
    
    best_feature_index, best_threshold = find_best_split(X, y)
    if best_feature_index is None or depth >= max_depth:
        if len(y) == 0:
            print('\t' * depth, "Reached leaf node with no samples.")
            return {'prediction': None}
        print('\t' * depth, "Reached leaf node. Prediction:", np.argmax(np.bincount(y)))
        return {'prediction': np.argmax(np.bincount(y))}
    
    print('\t' * depth, "Best split feature index:", best_feature_index, ", Threshold:", best_threshold)
    
    X_left, y_left, X_right, y_right = split_dataset(X, y, best_feature_index, best_threshold)
    
    entropy_left = calculate_entropy(y_left)
    entropy_right = calculate_entropy(y_right)
    print('\t' * depth, "Entropy (left):", entropy_left)
    print('\t' * depth, "Entropy (right):", entropy_right)
    
    gini_left = calculate_gini_impurity(y_left)
    gini_right = calculate_gini_impurity(y_right)
    print('\t' * depth, "Gini impurity (left):", gini_left)
    print('\t' * depth, "Gini impurity (right):", gini_right)
    
    # Calculate information gain or Gini gain
    if criterion == 'entropy':
        total_samples = len(y)
        total_entropy = (len(y_left) / total_samples) * entropy_left + (len(y_right) / total_samples) * entropy_right
        entropy_gain = impurity - total_entropy
        print('\t' * depth, "Entropy Gain:", entropy_gain)
    else:
        total_samples = len(y)
        total_gini = (len(y_left) / total_samples) * gini_left + (len(y_right) / total_samples) * gini_right
        gini_gain = impurity - total_gini
        print('\t' * depth, "Gini Gain:", gini_gain)
    
    # Recursively build left and right subtrees
    print('\t' * depth, "Splitting left subtree:")
    left_subtree = build_tree_verbose(X_left, y_left, max_depth, criterion, depth + 1)
    
    print('\t' * depth, "Splitting right subtree:")
    right_subtree = build_tree_verbose(X_right, y_right, max_depth, criterion, depth + 1)
    
    return {
        'feature_index': best_feature_index,
        'threshold': best_threshold,
        'left': left_subtree,
        'right': right_subtree
    }

# Load the data 
df = pd.read_csv('diabetes.csv')

# Split features and labels
X = df.drop('Outcome', axis=1).values
y = df['Outcome'].values

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build decision tree with entropy criterion and print details
print("Building Decision Tree with Entropy Criterion:")
entropy_tree_verbose = build_tree_verbose(X_train, y_train, max_depth=5, criterion='entropy')

# Build decision tree with Gini impurity criterion and print details
print("\nBuilding Decision Tree with Gini Impurity Criterion:")
gini_tree_verbose = build_tree_verbose(X_train, y_train, max_depth=5, criterion='gini')


Building Decision Tree with Entropy Criterion:
 Depth: 0
 Impurity ( entropy ): 0.9312740341054782
 Best split feature index: 1 , Threshold: 127.0
 Entropy (left): 0.7115523492017045
 Entropy (right): 0.9637545671271062
 Gini impurity (left): 0.31379355687047994
 Gini impurity (right): 0.4750876913265305
 Entropy Gain: 0.12771305816877376
 Splitting left subtree:
	 Depth: 1
	 Impurity ( entropy ): 0.7115523492017045
	 Best split feature index: 7 , Threshold: 28.0
	 Entropy (left): 0.39124356362925566
	 Entropy (right): 0.9332728777019407
	 Gini impurity (left): 0.14201183431952646
	 Gini impurity (right): 0.4544658800462169
	 Entropy Gain: 0.08542941614095201
	 Splitting left subtree:
		 Depth: 2
		 Impurity ( entropy ): 0.39124356362925566
		 Best split feature index: 5 , Threshold: 30.9
		 Entropy (left): 0.06766982525198403
		 Entropy (right): 0.6460174789535751
		 Gini impurity (left): 0.015998959417273584
		 Gini impurity (right): 0.27548092252099043
		 Entropy Gain: 0.06972884059

					 Reached leaf node. Prediction: 1
			 Splitting right subtree:
				 Depth: 4
				 Impurity ( entropy ): -0.0
				 Best split feature index: 0 , Threshold: 0.0
				 Entropy (left): -0.0
				 Entropy (right): -0.0
				 Gini impurity (left): 0.0
				 Gini impurity (right): 0.0
				 Entropy Gain: 0.0
				 Splitting left subtree:
					 Depth: 5
					 Impurity ( entropy ): -0.0
					 Reached leaf node. Prediction: 0
				 Splitting right subtree:
					 Depth: 5
					 Impurity ( entropy ): -0.0
					 Reached leaf node. Prediction: 0
	 Splitting right subtree:
		 Depth: 2
		 Impurity ( entropy ): 0.8500455213505094
		 Best split feature index: 1 , Threshold: 165.0
		 Entropy (left): 0.9509560484549725
		 Entropy (right): 0.4394969869215134
		 Gini impurity (left): 0.46639231824417005
		 Gini impurity (right): 0.16528925619834722
		 Entropy Gain: 0.07166768384240962
		 Splitting left subtree:
			 Depth: 3
			 Impurity ( entropy ): 0.9509560484549725
			 Best split feature index: 2 , Thresho

			 Best split feature index: 1 , Threshold: 94.0
			 Entropy (left): 0.5547781633412736
			 Entropy (right): 0.9989326546260581
			 Gini impurity (left): 0.22476586888657657
			 Gini impurity (right): 0.49926035502958577
			 Gini Gain: 0.053867734106682774
			 Splitting left subtree:
				 Depth: 4
				 Impurity ( gini ): 0.22476586888657657
				 Best split feature index: 1 , Threshold: 0.0
				 Entropy (left): -0.0
				 Entropy (right): 0.362051251733998
				 Gini impurity (left): 0.0
				 Gini impurity (right): 0.1284185493460166
				 Gini Gain: 0.10463238724030298
				 Splitting left subtree:
					 Depth: 5
					 Impurity ( gini ): 0.0
					 Reached leaf node. Prediction: 1
				 Splitting right subtree:
					 Depth: 5
					 Impurity ( gini ): 0.1284185493460166
					 Reached leaf node. Prediction: 0
			 Splitting right subtree:
				 Depth: 4
				 Impurity ( gini ): 0.49926035502958577
				 Best split feature index: 6 , Threshold: 0.512
				 Entropy (left): 0.9835585673909616
				 En

In [29]:
# Function to make predictions using the decision tree
def predict(tree, X):
    predictions = []
    for x in X:
        current_node = tree
        while 'prediction' not in current_node:
            feature_index = current_node['feature_index']
            threshold = current_node['threshold']
            if x[feature_index] <= threshold:
                current_node = current_node['left']
            else:
                current_node = current_node['right']
        predictions.append(current_node['prediction'])
    return np.array(predictions)
# Make predictions using entropy tree
entropy_pred = predict(entropy_tree, X_test)

# Make predictions using Gini tree
gini_pred = predict(gini_tree, X_test)

# Print actual and predicted labels
print("Actual Labels   | Entropy Predictions | Gini Predictions")
print("-" * 60)
for actual, entropy, gini in zip(y_test, entropy_pred, gini_pred):
    print(f"{actual:^15} | {entropy:^19} | {gini:^15}")


Actual Labels   | Entropy Predictions | Gini Predictions
------------------------------------------------------------
       0        |          1          |        0       
       0        |          0          |        0       
       0        |          0          |        0       
       0        |          0          |        0       
       0        |          0          |        0       
       0        |          1          |        0       
       0        |          0          |        0       
       0        |          1          |        1       
       0        |          0          |        0       
       0        |          0          |        0       
       1        |          1          |        1       
       0        |          1          |        1       
       1        |          0          |        0       
       0        |          1          |        1       
       0        |          0          |        0       
       1        |          1          |   

In [31]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split  # Import train_test_split function
from sklearn.metrics import accuracy_score

# Load the data 
df = pd.read_csv('diabetes.csv')

# Function to calculate entropy
def calculate_entropy(y):
    unique_labels, counts = np.unique(y, return_counts=True)
    probabilities = counts / len(y)
    entropy = -np.sum(probabilities * np.log2(probabilities))
    return entropy

# Function to calculate Gini impurity
def calculate_gini_impurity(y):
    unique_labels, counts = np.unique(y, return_counts=True)
    probabilities = counts / len(y)
    gini = 1 - np.sum(probabilities ** 2)
    return gini

# Function to split dataset based on a given feature and threshold
def split_dataset(X, y, feature_index, threshold):
    left_mask = X[:, feature_index] <= threshold
    right_mask = ~left_mask
    X_left, y_left = X[left_mask], y[left_mask]
    X_right, y_right = X[right_mask], y[right_mask]
    return X_left, y_left, X_right, y_right

# Function to find the best split based on Gini impurity
def find_best_split_gini(X, y):
    best_gini = float('inf')
    best_feature_index = None
    best_threshold = None
    best_information_gain = float('-inf')  # Initialize to negative infinity
    
    for feature_index in range(X.shape[1]):
        thresholds = np.unique(X[:, feature_index])
        for threshold in thresholds:
            X_left, y_left, X_right, y_right = split_dataset(X, y, feature_index, threshold)
            total_samples = len(y)
            gini_left = calculate_gini_impurity(y_left)
            gini_right = calculate_gini_impurity(y_right)
            total_gini = (len(y_left) / total_samples) * gini_left + (len(y_right) / total_samples) * gini_right
            information_gain = calculate_gini_impurity(y) - total_gini
            if information_gain > best_information_gain:
                best_information_gain = information_gain
                best_gini = total_gini
                best_feature_index = feature_index
                best_threshold = threshold
                
                # Print information gain for this split
                print("Information Gain (Gini) for feature index", feature_index, "and threshold", threshold, ":", information_gain)
    
    return best_feature_index, best_threshold, best_information_gain

# Function to find the best split based on entropy
def find_best_split_entropy(X, y):
    best_entropy = float('inf')
    best_feature_index = None
    best_threshold = None
    best_information_gain = float('-inf')  # Initialize to negative infinity
    
    for feature_index in range(X.shape[1]):
        thresholds = np.unique(X[:, feature_index])
        for threshold in thresholds:
            X_left, y_left, X_right, y_right = split_dataset(X, y, feature_index, threshold)
            total_samples = len(y)
            entropy_left = calculate_entropy(y_left)
            entropy_right = calculate_entropy(y_right)
            total_entropy = (len(y_left) / total_samples) * entropy_left + (len(y_right) / total_samples) * entropy_right
            information_gain = calculate_entropy(y) - total_entropy
            if information_gain > best_information_gain:
                best_information_gain = information_gain
                best_entropy = total_entropy
                best_feature_index = feature_index
                best_threshold = threshold
                
                # Print information gain for this split
                print("Information Gain (Entropy) for feature index", feature_index, "and threshold", threshold, ":", information_gain)
    
    return best_feature_index, best_threshold, best_information_gain

# Modify build_tree_verbose function to accommodate the changes
def build_tree_verbose(X, y, max_depth, criterion='entropy', depth=0):
    if criterion == 'entropy':
        find_best_split = find_best_split_entropy
        impurity_func = calculate_entropy
    elif criterion == 'gini':
        find_best_split = find_best_split_gini
        impurity_func = calculate_gini_impurity
    else:
        raise ValueError("Invalid criterion. Choose either 'entropy' or 'gini'.")
    
    print('\t' * depth, "Depth:", depth)
    impurity = impurity_func(y)
    print('\t' * depth, "Impurity (", criterion, "):", impurity)
    
    best_feature_index, best_threshold, best_information_gain = find_best_split(X, y)
    if best_feature_index is None or depth >= max_depth:
        if len(y) == 0:
            print('\t' * depth, "Reached leaf node with no samples.")
            return {'prediction': None}
        print('\t' * depth, "Reached leaf node. Prediction:", np.argmax(np.bincount(y)))
        return {'prediction': np.argmax(np.bincount(y))}
    
    print('\t' * depth, "Best split feature index:", best_feature_index, ", Threshold:", best_threshold)
    print('\t' * depth, "Information Gain (", criterion, "):", best_information_gain)
    
    
    X_left, y_left, X_right, y_right = split_dataset(X, y, best_feature_index, best_threshold)
    
    entropy_left = calculate_entropy(y_left)
    entropy_right = calculate_entropy(y_right)
    print('\t' * depth, "Entropy (left):", entropy_left)
    print('\t' * depth, "Entropy (right):", entropy_right)
    
    gini_left = calculate_gini_impurity(y_left)
    gini_right = calculate_gini_impurity(y_right)
    print('\t' * depth, "Gini impurity (left):", gini_left)
    print('\t' * depth, "Gini impurity (right):", gini_right)
    
    # Recursively build left and right subtrees
    print('\t' * depth, "Splitting left subtree:")
    left_subtree = build_tree_verbose(X_left, y_left, max_depth, criterion, depth + 1)
    
    print('\t' * depth, "Splitting right subtree:")
    right_subtree = build_tree_verbose(X_right, y_right, max_depth, criterion, depth + 1)
    
    return {
        'feature_index': best_feature_index,
        'threshold': best_threshold,
        'left': left_subtree,
        'right': right_subtree
    }

# Split features and labels
X = df.drop('Outcome', axis=1).values
y = df['Outcome'].values

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build decision tree with entropy criterion and print details
print("Building Decision Tree with Entropy Criterion:")
entropy_tree_verbose = build_tree_verbose(X_train, y_train, max_depth=5, criterion='entropy')

# Build decision tree with Gini impurity criterion and print details
print("\nBuilding Decision Tree with Gini Impurity Criterion:")
gini_tree_verbose = build_tree_verbose(X_train, y_train, max_depth=5, criterion='gini')


Building Decision Tree with Entropy Criterion:
 Depth: 0
 Impurity ( entropy ): 0.9312740341054782
Information Gain (Entropy) for feature index 0 and threshold 0.0 : 0.00016566217902580505
Information Gain (Entropy) for feature index 0 and threshold 1.0 : 0.00965274944114114
Information Gain (Entropy) for feature index 0 and threshold 2.0 : 0.026759041120135274
Information Gain (Entropy) for feature index 0 and threshold 3.0 : 0.026933035723649246
Information Gain (Entropy) for feature index 0 and threshold 4.0 : 0.030822562048189672
Information Gain (Entropy) for feature index 0 and threshold 6.0 : 0.03327793641199861
Information Gain (Entropy) for feature index 1 and threshold 86.0 : 0.03504416732809612
Information Gain (Entropy) for feature index 1 and threshold 87.0 : 0.04087305042563838
Information Gain (Entropy) for feature index 1 and threshold 88.0 : 0.042548316041415735
Information Gain (Entropy) for feature index 1 and threshold 89.0 : 0.04750304916550363
Information Gain (En

Information Gain (Entropy) for feature index 6 and threshold 0.572 : 0.017907487455651322
Information Gain (Entropy) for feature index 6 and threshold 0.58 : 0.018355083902788248
Information Gain (Entropy) for feature index 6 and threshold 0.583 : 0.018820593582142338
Information Gain (Entropy) for feature index 6 and threshold 0.586 : 0.019305510510077173
Information Gain (Entropy) for feature index 6 and threshold 0.597 : 0.019811523806090837
Information Gain (Entropy) for feature index 6 and threshold 0.614 : 0.020340553222414896
Information Gain (Entropy) for feature index 6 and threshold 0.624 : 0.020894793150316934
Information Gain (Entropy) for feature index 6 and threshold 0.626 : 0.02147676765263625
Information Gain (Entropy) for feature index 6 and threshold 0.637 : 0.022089400014350953
Information Gain (Entropy) for feature index 6 and threshold 0.647 : 0.022736101665646505
Information Gain (Entropy) for feature index 6 and threshold 0.658 : 0.023420887338631062
Information 

		 Best split feature index: 5 , Threshold: 26.2
		 Information Gain ( entropy ): 0.10739491962715875
		 Entropy (left): 0.19143325481419343
		 Entropy (right): 0.9856640314885599
		 Gini impurity (left): 0.05709342560553643
		 Gini impurity (right): 0.49009602194787383
		 Splitting left subtree:
			 Depth: 3
			 Impurity ( entropy ): 0.19143325481419343
Information Gain (Entropy) for feature index 0 and threshold 0.0 : 0.0026122436963443574
Information Gain (Entropy) for feature index 0 and threshold 1.0 : 0.005395931003723903
Information Gain (Entropy) for feature index 0 and threshold 2.0 : 0.0068594006645853745
Information Gain (Entropy) for feature index 0 and threshold 3.0 : 0.009947081674627073
Information Gain (Entropy) for feature index 0 and threshold 4.0 : 0.015045753990768407
Information Gain (Entropy) for feature index 0 and threshold 5.0 : 0.027557321734609508
Information Gain (Entropy) for feature index 0 and threshold 6.0 : 0.03553943054003694
Information Gain (Entropy)

Information Gain (Entropy) for feature index 0 and threshold 7.0 : 0.015731130220181355
Information Gain (Entropy) for feature index 0 and threshold 13.0 : 0.017112670419961562
Information Gain (Entropy) for feature index 1 and threshold 95.0 : 0.034609733563223166
Information Gain (Entropy) for feature index 2 and threshold 66.0 : 0.056487520137732505
Information Gain (Entropy) for feature index 2 and threshold 82.0 : 0.059873473003484334
Information Gain (Entropy) for feature index 3 and threshold 25.0 : 0.06214836677931301
Information Gain (Entropy) for feature index 3 and threshold 26.0 : 0.0781754152814359
Information Gain (Entropy) for feature index 3 and threshold 27.0 : 0.08510528114458571
					 Reached leaf node. Prediction: 0
				 Splitting right subtree:
					 Depth: 5
					 Impurity ( entropy ): 0.8238116333123173
Information Gain (Entropy) for feature index 0 and threshold 0.0 : 0.02918190183706737
Information Gain (Entropy) for feature index 0 and threshold 2.0 : 0.034737

		 Best split feature index: 1 , Threshold: 165.0
		 Information Gain ( entropy ): 0.07166768384240962
		 Entropy (left): 0.9509560484549725
		 Entropy (right): 0.4394969869215134
		 Gini impurity (left): 0.46639231824417005
		 Gini impurity (right): 0.16528925619834722
		 Splitting left subtree:
			 Depth: 3
			 Impurity ( entropy ): 0.9509560484549725
Information Gain (Entropy) for feature index 0 and threshold 0.0 : 8.336847809420345e-05
Information Gain (Entropy) for feature index 0 and threshold 1.0 : 0.000752651238218438
Information Gain (Entropy) for feature index 0 and threshold 2.0 : 0.0036900908429696733
Information Gain (Entropy) for feature index 0 and threshold 4.0 : 0.00782861526431522
Information Gain (Entropy) for feature index 0 and threshold 6.0 : 0.01973112268816324
Information Gain (Entropy) for feature index 0 and threshold 7.0 : 0.026170037126564738
Information Gain (Entropy) for feature index 1 and threshold 164.0 : 0.04078445936563313
Information Gain (Entropy) 

 Best split feature index: 1 , Threshold: 127.0
 Information Gain ( gini ): 0.08048715147510632
 Entropy (left): 0.7115523492017045
 Entropy (right): 0.9637545671271062
 Gini impurity (left): 0.31379355687047994
 Gini impurity (right): 0.4750876913265305
 Splitting left subtree:
	 Depth: 1
	 Impurity ( gini ): 0.31379355687047994
Information Gain (Gini) for feature index 0 and threshold 0.0 : 0.001017623041914606
Information Gain (Gini) for feature index 0 and threshold 1.0 : 0.005312563382671387
Information Gain (Gini) for feature index 0 and threshold 2.0 : 0.012431833136731796
Information Gain (Gini) for feature index 0 and threshold 3.0 : 0.01957133464825772
Information Gain (Gini) for feature index 5 and threshold 25.6 : 0.019780823572032424
Information Gain (Gini) for feature index 5 and threshold 25.8 : 0.02010461794034607
Information Gain (Gini) for feature index 5 and threshold 25.9 : 0.021089754925298743
Information Gain (Gini) for feature index 5 and threshold 26.0 : 0.02209

					 Reached leaf node. Prediction: 0
		 Splitting right subtree:
			 Depth: 3
			 Impurity ( gini ): 0.27548092252099043
Information Gain (Gini) for feature index 0 and threshold 0.0 : 0.00047936050911912
Information Gain (Gini) for feature index 0 and threshold 1.0 : 0.005016564427766379
Information Gain (Gini) for feature index 1 and threshold 87.0 : 0.006145808367645167
Information Gain (Gini) for feature index 1 and threshold 105.0 : 0.0089751098427715
Information Gain (Gini) for feature index 1 and threshold 106.0 : 0.01201078052526977
Information Gain (Gini) for feature index 2 and threshold 0.0 : 0.014527314273567737
Information Gain (Gini) for feature index 2 and threshold 30.0 : 0.029360466742368613
			 Best split feature index: 2 , Threshold: 30.0
			 Information Gain ( gini ): 0.029360466742368613
			 Entropy (left): -0.0
			 Entropy (right): 0.6032154068313578
			 Gini impurity (left): 0.0
			 Gini impurity (right): 0.2513019390581718
			 Splitting left subtree:
				 Dept

			 Best split feature index: 1 , Threshold: 94.0
			 Information Gain ( gini ): 0.053867734106682774
			 Entropy (left): 0.5547781633412736
			 Entropy (right): 0.9989326546260581
			 Gini impurity (left): 0.22476586888657657
			 Gini impurity (right): 0.49926035502958577
			 Splitting left subtree:
				 Depth: 4
				 Impurity ( gini ): 0.22476586888657657
Information Gain (Gini) for feature index 0 and threshold 0.0 : 0.0022964584305141533
Information Gain (Gini) for feature index 0 and threshold 1.0 : 0.006403586008164652
Information Gain (Gini) for feature index 0 and threshold 2.0 : 0.007991675338189369
Information Gain (Gini) for feature index 0 and threshold 3.0 : 0.011582138171289003
Information Gain (Gini) for feature index 0 and threshold 4.0 : 0.01585649868688377
Information Gain (Gini) for feature index 1 and threshold 0.0 : 0.10463238724030298
				 Best split feature index: 1 , Threshold: 0.0
				 Information Gain ( gini ): 0.10463238724030298
				 Entropy (left): -0.0
			

			 Best split feature index: 2 , Threshold: 54.0
			 Information Gain ( gini ): 0.050645342312008734
			 Entropy (left): 0.9182958340544896
			 Entropy (right): 0.4394969869215134
			 Gini impurity (left): 0.4444444444444444
			 Gini impurity (right): 0.16528925619834722
			 Splitting left subtree:
				 Depth: 4
				 Impurity ( gini ): 0.4444444444444444
Information Gain (Gini) for feature index 0 and threshold 1.0 : 0.4444444444444444
				 Best split feature index: 0 , Threshold: 1.0
				 Information Gain ( gini ): 0.4444444444444444
				 Entropy (left): -0.0
				 Entropy (right): -0.0
				 Gini impurity (left): 0.0
				 Gini impurity (right): 0.0
				 Splitting left subtree:
					 Depth: 5
					 Impurity ( gini ): 0.0
Information Gain (Gini) for feature index 0 and threshold 1.0 : 0.0
					 Reached leaf node. Prediction: 0
				 Splitting right subtree:
					 Depth: 5
					 Impurity ( gini ): 0.0
Information Gain (Gini) for feature index 0 and threshold 2.0 : 0.0
					 Reached leaf n

				 Best split feature index: 2 , Threshold: 60.0
				 Information Gain ( gini ): 0.10850694444444442
				 Entropy (left): -0.0
				 Entropy (right): 0.954434002924965
				 Gini impurity (left): 0.0
				 Gini impurity (right): 0.46875
				 Splitting left subtree:
					 Depth: 5
					 Impurity ( gini ): 0.0
Information Gain (Gini) for feature index 0 and threshold 0.0 : 0.0
					 Reached leaf node. Prediction: 1
				 Splitting right subtree:
					 Depth: 5
					 Impurity ( gini ): 0.46875
Information Gain (Gini) for feature index 0 and threshold 0.0 : 0.016826923076923017
Information Gain (Gini) for feature index 2 and threshold 64.0 : 0.031250000000000056
Information Gain (Gini) for feature index 2 and threshold 90.0 : 0.04111842105263169
Information Gain (Gini) for feature index 4 and threshold 180.0 : 0.046875
Information Gain (Gini) for feature index 4 and threshold 190.0 : 0.07008333333333339
Information Gain (Gini) for feature index 4 and threshold 210.0 : 0.07291666666666663
Inf

In [33]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split  # Import train_test_split function
from sklearn.metrics import accuracy_score

# Load the data 
df = pd.read_csv('diabetes.csv')

# Function to calculate entropy
def calculate_entropy(y):
    unique_labels, counts = np.unique(y, return_counts=True)
    probabilities = counts / len(y)
    entropy = -np.sum(probabilities * np.log2(probabilities))
    return entropy

# Function to calculate Gini impurity
def calculate_gini_impurity(y):
    unique_labels, counts = np.unique(y, return_counts=True)
    probabilities = counts / len(y)
    gini = 1 - np.sum(probabilities ** 2)
    return gini

# Function to split dataset based on a given feature and threshold
def split_dataset(X, y, feature_index, threshold):
    left_mask = X[:, feature_index] <= threshold
    right_mask = ~left_mask
    X_left, y_left = X[left_mask], y[left_mask]
    X_right, y_right = X[right_mask], y[right_mask]
    return X_left, y_left, X_right, y_right

# Function to find the best split based on Gini impurity
def find_best_split_gini(X, y):
    best_gini = float('inf')
    best_feature_index = None
    best_threshold = None
    best_information_gain = float('-inf')  # Initialize to negative infinity
    
    for feature_index in range(X.shape[1]):
        thresholds = np.unique(X[:, feature_index])
        for threshold in thresholds:
            X_left, y_left, X_right, y_right = split_dataset(X, y, feature_index, threshold)
            total_samples = len(y)
            gini_left = calculate_gini_impurity(y_left)
            gini_right = calculate_gini_impurity(y_right)
            total_gini = (len(y_left) / total_samples) * gini_left + (len(y_right) / total_samples) * gini_right
            information_gain = calculate_gini_impurity(y) - total_gini
            if information_gain > best_information_gain:
                best_information_gain = information_gain
                best_gini = total_gini
                best_feature_index = feature_index
                best_threshold = threshold
                
                # Print information gain and impurity for this split
                print("Information Gain (Gini) for feature index", feature_index, "and threshold", threshold, ":", information_gain)
                print("Total Gini impurity for this split:", total_gini)
    
    return best_feature_index, best_threshold, best_information_gain# Function to find the best split based on Gini impurity
def find_best_split_gini(X, y):
    best_gini = float('inf')
    best_feature_index = None
    best_threshold = None
    best_information_gain = float('-inf')  # Initialize to negative infinity
    
    for feature_index in range(X.shape[1]):
        thresholds = np.unique(X[:, feature_index])
        for threshold in thresholds:
            X_left, y_left, X_right, y_right = split_dataset(X, y, feature_index, threshold)
            total_samples = len(y)
            gini_left = calculate_gini_impurity(y_left)
            gini_right = calculate_gini_impurity(y_right)
            total_gini = (len(y_left) / total_samples) * gini_left + (len(y_right) / total_samples) * gini_right
            information_gain = calculate_gini_impurity(y) - total_gini
            if information_gain > best_information_gain:
                best_information_gain = information_gain
                best_gini = total_gini
                best_feature_index = feature_index
                best_threshold = threshold
                
                # Print information gain and impurity for this split
                print("Information Gain (Gini) for feature index", feature_index, "and threshold", threshold, ":", information_gain)
                print("Total Gini impurity for this split:", total_gini)
    
    return best_feature_index, best_threshold, best_information_gain


# Function to find the best split based on entropy
def find_best_split_entropy(X, y):
    best_entropy = float('inf')
    best_feature_index = None
    best_threshold = None
    best_information_gain = float('-inf')  # Initialize to negative infinity
    
    for feature_index in range(X.shape[1]):
        thresholds = np.unique(X[:, feature_index])
        for threshold in thresholds:
            X_left, y_left, X_right, y_right = split_dataset(X, y, feature_index, threshold)
            total_samples = len(y)
            entropy_left = calculate_entropy(y_left)
            entropy_right = calculate_entropy(y_right)
            total_entropy = (len(y_left) / total_samples) * entropy_left + (len(y_right) / total_samples) * entropy_right
            information_gain = calculate_entropy(y) - total_entropy
            if information_gain > best_information_gain:
                best_information_gain = information_gain
                best_entropy = total_entropy
                best_feature_index = feature_index
                best_threshold = threshold
                
                # Print information gain and impurity for this split
                print("Information Gain (Entropy) for feature index", feature_index, "and threshold", threshold, ":", information_gain)
                print("Total Entropy for this split:", total_entropy)
    
    return best_feature_index, best_threshold, best_information_gain

# Modify build_tree_verbose function to accommodate the changes
def build_tree_verbose(X, y, max_depth, criterion='entropy', depth=0):
    if criterion == 'entropy':
        find_best_split = find_best_split_entropy
        impurity_func = calculate_entropy
    elif criterion == 'gini':
        find_best_split = find_best_split_gini
        impurity_func = calculate_gini_impurity
    else:
        raise ValueError("Invalid criterion. Choose either 'entropy' or 'gini'.")
    
    print('\t' * depth, "Depth:", depth)
    impurity = impurity_func(y)
    print('\t' * depth, "Impurity (", criterion, "):", impurity)
    
    best_feature_index, best_threshold, best_information_gain = find_best_split(X, y)
    if best_feature_index is None or depth >= max_depth:
        if len(y) == 0:
            print('\t' * depth, "Reached leaf node with no samples.")
            return {'prediction': None}
        print('\t' * depth, "Reached leaf node. Prediction:", np.argmax(np.bincount(y)))
        return {'prediction': np.argmax(np.bincount(y))}
    
    print('\t' * depth, "Best split feature index:", best_feature_index, ", Threshold:", best_threshold)
    print('\t' * depth, "Information Gain (", criterion, "):", best_information_gain)
    
    
    X_left, y_left, X_right, y_right = split_dataset(X, y, best_feature_index, best_threshold)
    
    entropy_left = calculate_entropy(y_left)
    entropy_right = calculate_entropy(y_right)
    print('\t' * depth, "Entropy (left):", entropy_left)
    print('\t' * depth, "Entropy (right):", entropy_right)
    
    gini_left = calculate_gini_impurity(y_left)
    gini_right = calculate_gini_impurity(y_right)
    print('\t' * depth, "Gini impurity (left):", gini_left)
    print('\t' * depth, "Gini impurity (right):", gini_right)
    
    # Recursively build left and right subtrees
    print('\t' * depth, "Splitting left subtree:")
    left_subtree = build_tree_verbose(X_left, y_left, max_depth, criterion, depth + 1)
    
    print('\t' * depth, "Splitting right subtree:")
    right_subtree = build_tree_verbose(X_right, y_right, max_depth, criterion, depth + 1)
    
    return {
        'feature_index': best_feature_index,
        'threshold': best_threshold,
        'left': left_subtree,
        'right': right_subtree
    }

# Split features and labels
X = df.drop('Outcome', axis=1).values
y = df['Outcome'].values

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build decision tree with entropy criterion and print details
print("Building Decision Tree with Entropy Criterion:")
entropy_tree_verbose = build_tree_verbose(X_train, y_train, max_depth=5, criterion='entropy')

# Build decision tree with Gini impurity criterion and print details
print("\nBuilding Decision Tree with Gini Impurity Criterion:")
gini_tree_verbose = build_tree_verbose(X_train, y_train, max_depth=5, criterion='gini')


Building Decision Tree with Entropy Criterion:
 Depth: 0
 Impurity ( entropy ): 0.9312740341054782
Information Gain (Entropy) for feature index 0 and threshold 0.0 : 0.00016566217902580505
Total Entropy for this split: 0.9311083719264523
Information Gain (Entropy) for feature index 0 and threshold 1.0 : 0.00965274944114114
Total Entropy for this split: 0.921621284664337
Information Gain (Entropy) for feature index 0 and threshold 2.0 : 0.026759041120135274
Total Entropy for this split: 0.9045149929853429
Information Gain (Entropy) for feature index 0 and threshold 3.0 : 0.026933035723649246
Total Entropy for this split: 0.9043409983818289
Information Gain (Entropy) for feature index 0 and threshold 4.0 : 0.030822562048189672
Total Entropy for this split: 0.9004514720572885
Information Gain (Entropy) for feature index 0 and threshold 6.0 : 0.03327793641199861
Total Entropy for this split: 0.8979960976934795
Information Gain (Entropy) for feature index 1 and threshold 86.0 : 0.0350441673

Information Gain (Entropy) for feature index 6 and threshold 0.572 : 0.017907487455651322
Total Entropy for this split: 0.04976233779633271
Information Gain (Entropy) for feature index 6 and threshold 0.58 : 0.018355083902788248
Total Entropy for this split: 0.049314741349195786
Information Gain (Entropy) for feature index 6 and threshold 0.583 : 0.018820593582142338
Total Entropy for this split: 0.048849231669841696
Information Gain (Entropy) for feature index 6 and threshold 0.586 : 0.019305510510077173
Total Entropy for this split: 0.04836431474190686
Information Gain (Entropy) for feature index 6 and threshold 0.597 : 0.019811523806090837
Total Entropy for this split: 0.047858301445893196
Information Gain (Entropy) for feature index 6 and threshold 0.614 : 0.020340553222414896
Total Entropy for this split: 0.04732927202956914
Information Gain (Entropy) for feature index 6 and threshold 0.624 : 0.020894793150316934
Total Entropy for this split: 0.0467750321016671
Information Gain (E

Information Gain (Entropy) for feature index 5 and threshold 0.0 : 0.19143325481419343
Total Entropy for this split: -0.0
			 Best split feature index: 5 , Threshold: 0.0
			 Information Gain ( entropy ): 0.19143325481419343
			 Entropy (left): -0.0
			 Entropy (right): -0.0
			 Gini impurity (left): 0.0
			 Gini impurity (right): 0.0
			 Splitting left subtree:
				 Depth: 4
				 Impurity ( entropy ): -0.0
Information Gain (Entropy) for feature index 0 and threshold 8.0 : 0.0
Total Entropy for this split: -0.0
				 Best split feature index: 0 , Threshold: 8.0
				 Information Gain ( entropy ): 0.0
				 Entropy (left): -0.0
				 Entropy (right): -0.0
				 Gini impurity (left): 0.0
				 Gini impurity (right): 1.0
				 Splitting left subtree:
					 Depth: 5
					 Impurity ( entropy ): -0.0
Information Gain (Entropy) for feature index 0 and threshold 8.0 : 0.0
Total Entropy for this split: -0.0
					 Reached leaf node. Prediction: 1
				 Splitting right subtree:
					 Depth: 5
					 Impur

					 Reached leaf node. Prediction: 0
				 Splitting right subtree:
					 Depth: 5
					 Impurity ( entropy ): 0.8238116333123173
Information Gain (Entropy) for feature index 0 and threshold 0.0 : 0.02918190183706737
Total Entropy for this split: 0.79462973147525
Information Gain (Entropy) for feature index 0 and threshold 2.0 : 0.034737326162482485
Total Entropy for this split: 0.7890743071498348
Information Gain (Entropy) for feature index 0 and threshold 3.0 : 0.07764587780991317
Total Entropy for this split: 0.7461657555024042
Information Gain (Entropy) for feature index 0 and threshold 4.0 : 0.11403399127794134
Total Entropy for this split: 0.709777642034376
Information Gain (Entropy) for feature index 0 and threshold 6.0 : 0.22197695749238644
Total Entropy for this split: 0.6018346758199309
					 Reached leaf node. Prediction: 1
 Splitting right subtree:
	 Depth: 1
	 Impurity ( entropy ): 0.9637545671271062
Information Gain (Entropy) for feature index 0 and threshold 0.0 : 0.000

Information Gain (Entropy) for feature index 1 and threshold 194.0 : 0.06774509098753345
Total Entropy for this split: 0.9285714285714286
Information Gain (Entropy) for feature index 4 and threshold 175.0 : 0.08366202196859251
Total Entropy for this split: 0.9126544975903695
Information Gain (Entropy) for feature index 7 and threshold 59.0 : 0.12939634593872218
Total Entropy for this split: 0.8669201736202399
			 Best split feature index: 7 , Threshold: 59.0
			 Information Gain ( entropy ): 0.12939634593872218
			 Entropy (left): 0.9709505944546686
			 Entropy (right): -0.0
			 Gini impurity (left): 0.48
			 Gini impurity (right): 0.0
			 Splitting left subtree:
				 Depth: 4
				 Impurity ( entropy ): 0.9709505944546686
Information Gain (Entropy) for feature index 0 and threshold 1.0 : 0.02950603126334206
Total Entropy for this split: 0.9414445631913265
Information Gain (Entropy) for feature index 0 and threshold 5.0 : 0.03899618234079649
Total Entropy for this split: 0.9319544121138

Information Gain (Entropy) for feature index 4 and threshold 250.0 : 0.04623064324612358
Total Entropy for this split: 0.9475182676829902
Information Gain (Entropy) for feature index 7 and threshold 24.0 : 0.06755992683865375
Total Entropy for this split: 0.92618898409046
Information Gain (Entropy) for feature index 7 and threshold 28.0 : 0.07170749981773938
Total Entropy for this split: 0.9220414111113744
Information Gain (Entropy) for feature index 7 and threshold 30.0 : 0.09816031844130646
Total Entropy for this split: 0.8955885924878073
					 Reached leaf node. Prediction: 1
			 Splitting right subtree:
				 Depth: 4
				 Impurity ( entropy ): -0.0
Information Gain (Entropy) for feature index 0 and threshold 0.0 : 0.0
Total Entropy for this split: -0.0
				 Best split feature index: 0 , Threshold: 0.0
				 Information Gain ( entropy ): 0.0
				 Entropy (left): -0.0
				 Entropy (right): -0.0
				 Gini impurity (left): 0.0
				 Gini impurity (right): 0.0
				 Splitting left subtree:

 Best split feature index: 1 , Threshold: 127.0
 Information Gain ( gini ): 0.08048715147510632
 Entropy (left): 0.7115523492017045
 Entropy (right): 0.9637545671271062
 Gini impurity (left): 0.31379355687047994
 Gini impurity (right): 0.4750876913265305
 Splitting left subtree:
	 Depth: 1
	 Impurity ( gini ): 0.31379355687047994
Information Gain (Gini) for feature index 0 and threshold 0.0 : 0.001017623041914606
Total Gini impurity for this split: 0.31277593382856533
Information Gain (Gini) for feature index 0 and threshold 1.0 : 0.005312563382671387
Total Gini impurity for this split: 0.30848099348780855
Information Gain (Gini) for feature index 0 and threshold 2.0 : 0.012431833136731796
Total Gini impurity for this split: 0.30136172373374814
Information Gain (Gini) for feature index 0 and threshold 3.0 : 0.01957133464825772
Total Gini impurity for this split: 0.2942222222222222
Information Gain (Gini) for feature index 5 and threshold 25.6 : 0.019780823572032424
Total Gini impurity 

Information Gain (Gini) for feature index 6 and threshold 0.572 : 0.000467298724322527
Total Gini impurity for this split: 0.015531660692951057
Information Gain (Gini) for feature index 6 and threshold 0.58 : 0.0004902745537500013
Total Gini impurity for this split: 0.015508684863523583
Information Gain (Gini) for feature index 6 and threshold 0.583 : 0.0005150884495316536
Total Gini impurity for this split: 0.01548387096774193
Information Gain (Gini) for feature index 6 and threshold 0.586 : 0.0005419701699617904
Total Gini impurity for this split: 0.015456989247311793
Information Gain (Gini) for feature index 6 and threshold 0.597 : 0.0005711894312988471
Total Gini impurity for this split: 0.015427769985974737
Information Gain (Gini) for feature index 6 and threshold 0.614 : 0.0006030649891211222
Total Gini impurity for this split: 0.015395894428152462
Information Gain (Gini) for feature index 6 and threshold 0.624 : 0.0006379763143549936
Total Gini impurity for this split: 0.0153609

					 Reached leaf node. Prediction: 0
				 Splitting right subtree:
					 Depth: 5
					 Impurity ( gini ): 0.38927335640138394
Information Gain (Gini) for feature index 0 and threshold 0.0 : 0.012010912962469922
Total Gini impurity for this split: 0.377262443438914
Information Gain (Gini) for feature index 1 and threshold 84.0 : 0.018685121107266278
Total Gini impurity for this split: 0.37058823529411766
Information Gain (Gini) for feature index 1 and threshold 85.0 : 0.024161794535258185
Total Gini impurity for this split: 0.36511156186612576
Information Gain (Gini) for feature index 1 and threshold 87.0 : 0.0300296589223924
Total Gini impurity for this split: 0.35924369747899154
Information Gain (Gini) for feature index 2 and threshold 64.0 : 0.03447214738022597
Total Gini impurity for this split: 0.35480120902115797
Information Gain (Gini) for feature index 2 and threshold 66.0 : 0.04796760333092498
Total Gini impurity for this split: 0.34130575307045896
Information Gain (Gini) fo

			 Best split feature index: 1 , Threshold: 94.0
			 Information Gain ( gini ): 0.053867734106682774
			 Entropy (left): 0.5547781633412736
			 Entropy (right): 0.9989326546260581
			 Gini impurity (left): 0.22476586888657657
			 Gini impurity (right): 0.49926035502958577
			 Splitting left subtree:
				 Depth: 4
				 Impurity ( gini ): 0.22476586888657657
Information Gain (Gini) for feature index 0 and threshold 0.0 : 0.0022964584305141533
Total Gini impurity for this split: 0.22246941045606242
Information Gain (Gini) for feature index 0 and threshold 1.0 : 0.006403586008164652
Total Gini impurity for this split: 0.21836228287841192
Information Gain (Gini) for feature index 0 and threshold 2.0 : 0.007991675338189369
Total Gini impurity for this split: 0.2167741935483872
Information Gain (Gini) for feature index 0 and threshold 3.0 : 0.011582138171289003
Total Gini impurity for this split: 0.21318373071528757
Information Gain (Gini) for feature index 0 and threshold 4.0 : 0.0158564986

	 Best split feature index: 5 , Threshold: 29.9
	 Information Gain ( gini ): 0.067421134811538
	 Entropy (left): 0.8948692308065574
	 Entropy (right): 0.8500455213505094
	 Gini impurity (left): 0.4289169578070411
	 Gini impurity (right): 0.39971395235048357
	 Splitting left subtree:
		 Depth: 2
		 Impurity ( gini ): 0.4289169578070411
Information Gain (Gini) for feature index 0 and threshold 0.0 : 0.01361641135895364
Total Gini impurity for this split: 0.41530054644808745
Information Gain (Gini) for feature index 0 and threshold 1.0 : 0.02472752247006471
Total Gini impurity for this split: 0.4041894353369764
Information Gain (Gini) for feature index 1 and threshold 137.0 : 0.034595432337974685
Total Gini impurity for this split: 0.3943215254690664
Information Gain (Gini) for feature index 1 and threshold 139.0 : 0.03691551662527204
Total Gini impurity for this split: 0.39200144118176905
Information Gain (Gini) for feature index 1 and threshold 140.0 : 0.042367438181340145
Total Gini im

		 Best split feature index: 1 , Threshold: 165.0
		 Information Gain ( gini ): 0.03492082682116171
		 Entropy (left): 0.9509560484549725
		 Entropy (right): 0.4394969869215134
		 Gini impurity (left): 0.46639231824417005
		 Gini impurity (right): 0.16528925619834722
		 Splitting left subtree:
			 Depth: 3
			 Impurity ( gini ): 0.46639231824417005
Information Gain (Gini) for feature index 0 and threshold 0.0 : 5.405880246234229e-05
Total Gini impurity for this split: 0.4663382594417077
Information Gain (Gini) for feature index 0 and threshold 1.0 : 0.0004888390073574667
Total Gini impurity for this split: 0.4659034792368126
Information Gain (Gini) for feature index 0 and threshold 2.0 : 0.0023979270564770117
Total Gini impurity for this split: 0.46399439118769303
Information Gain (Gini) for feature index 0 and threshold 4.0 : 0.005016656868508607
Total Gini impurity for this split: 0.46137566137566144
Information Gain (Gini) for feature index 0 and threshold 6.0 : 0.012215749707060242

Information Gain (Gini) for feature index 2 and threshold 70.0 : 0.01174736842105259
Total Gini impurity for this split: 0.10105263157894742
Information Gain (Gini) for feature index 6 and threshold 0.299 : 0.018514285714285705
Total Gini impurity for this split: 0.0942857142857143
				 Best split feature index: 6 , Threshold: 0.299
				 Information Gain ( gini ): 0.018514285714285705
				 Entropy (left): 0.74959525725948
				 Entropy (right): -0.0
				 Gini impurity (left): 0.33673469387755106
				 Gini impurity (right): 0.0
				 Splitting left subtree:
					 Depth: 5
					 Impurity ( gini ): 0.33673469387755106
Information Gain (Gini) for feature index 0 and threshold 0.0 : 0.007064364207221452
Total Gini impurity for this split: 0.3296703296703296
Information Gain (Gini) for feature index 0 and threshold 1.0 : 0.02504638218923938
Total Gini impurity for this split: 0.3116883116883117
Information Gain (Gini) for feature index 0 and threshold 2.0 : 0.03673469387755102
Total Gini impur