# **Importing Necessary Libraries and Dataset**

In [None]:
import numpy as np
import pandas as pd
from collections import Counter
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import graphviz

# Load the dataset
data = pd.read_csv('ObesityDataSet.csv')

# **Bluiding Model**

In [None]:
# Define the Node class
class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, value=None):
        self.feature = feature
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value  # For leaf nodes, stores the class label

# Define the DecisionTreeCART class
class DecisionTreeCART:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth
        self.tree_ = None

    def fit(self, X, y):
        self.n_classes_ = len(np.unique(y))
        self.n_features_ = X.shape[1]
        self.tree_ = self._grow_tree(X, y)

    def _grow_tree(self, X, y, depth=0):
        n_samples, n_features = X.shape
        n_labels = len(np.unique(y))

        if (self.max_depth is not None and depth >= self.max_depth) or n_labels == 1:
            leaf_value = Counter(y).most_common(1)[0][0]
            return Node(value=leaf_value)

        best_gini = np.inf
        best_feature = None
        best_threshold = None

        for feature in range(n_features):
            thresholds = np.unique(X[:, feature])
            for threshold in thresholds:
                left_indices = np.where(X[:, feature] <= threshold)[0]
                right_indices = np.where(X[:, feature] > threshold)[0]

                gini_left = self._gini(y[left_indices])
                gini_right = self._gini(y[right_indices])
                gini = (len(left_indices) / n_samples) * gini_left + (len(right_indices) / n_samples) * gini_right

                if gini < best_gini:
                    best_gini = gini
                    best_feature = feature
                    best_threshold = threshold

        if best_gini == np.inf:
            leaf_value = Counter(y).most_common(1)[0][0]
            return Node(value=leaf_value)

        indices_left = np.where(X[:, best_feature] <= best_threshold)[0]
        indices_right = np.where(X[:, best_feature] > best_threshold)[0]

        left_child = self._grow_tree(X[indices_left], y[indices_left], depth + 1)
        right_child = self._grow_tree(X[indices_right], y[indices_right], depth + 1)

        return Node(feature=best_feature, threshold=best_threshold, left=left_child, right=right_child)

    def _gini(self, y):
        _, counts = np.unique(y, return_counts=True)
        probabilities = counts / len(y)
        gini = 1 - np.sum(probabilities ** 2)
        return gini

    def predict(self, X):
        return [self._predict(x, self.tree_) for x in X]

    def _predict(self, x, tree):
        if tree.value is not None:
            return tree.value

        if x[tree.feature] <= tree.threshold:
            return self._predict(x, tree.left)
        else:
            return self._predict(x, tree.right)

    def _export_to_graphviz(self, node, dot, feature_names):
        if node is None:
            return
        if node.left is not None:
            if node.left.value is not None:
                dot.node(str(id(node.left)), label=str(node.left.value))
            else:
                dot.node(str(id(node.left)), label=str(feature_names[node.feature]) + " <= " + str(node.threshold))
            dot.edge(str(id(node)), str(id(node.left)))
            self._export_to_graphviz(node.left, dot, feature_names)
        if node.right is not None:
            if node.right.value is not None:
                dot.node(str(id(node.right)), label=str(node.right.value))
            else:
                dot.node(str(id(node.right)), label=str(feature_names[node.feature]) + " > " + str(node.threshold))
            dot.edge(str(id(node)), str(id(node.right)))
            self._export_to_graphviz(node.right, dot, feature_names)

    def export_graphviz(self, feature_names):
        dot = graphviz.Digraph()
        if self.tree_.value is not None:
            dot.node(str(id(self.tree_)), label=str(self.tree_.value))
        else:
            dot.node(str(id(self.tree_)), label=str(feature_names[self.tree_.feature]) + " <= " + str(self.tree_.threshold))
        self._export_to_graphviz(self.tree_, dot, feature_names)
        return dot

# Prepare features and target variable
X = data.drop(['NObeyesdad'], axis=1).values
y = data['NObeyesdad'].values

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the decision tree model
tree = DecisionTreeCART(max_depth=5)
tree.fit(X_train, y_train)


# **Testing Model**

In [None]:
# Now, let's classify a new sample
test_data = pd.DataFrame([
    ['Male', 25, 1.80, 80, 'no', 'yes', 3, 3, 'Always', 'no', 2, 'no', 1, 0, 'Sometimes', 'Public_Transportation'],
    ['Female', 30, 1.65, 70, 'yes', 'no', 2, 2, 'Sometimes', 'no', 1, 'yes', 0, 0, 'Sometimes', 'Public_Transportation'],
    ['Male', 35, 1.70, 90, 'yes', 'yes', 3, 1, 'Always', 'no', 3, 'no', 2, 1, 'Frequently', 'Automobile'],
    ['Female', 40, 1.60, 60, 'no', 'no', 2, 2, 'Sometimes', 'no', 2, 'no', 0, 0, 'Sometimes', 'Public_Transportation']
], columns=data.drop(['NObeyesdad'], axis=1).columns)  # Use columns from data.drop

# Predict the label of the new sample
predicted_class = tree.predict(test_data.values)  # Pass values instead of DataFrame
print("Predicted class:", predicted_class)

# Predictions
y_pred = tree.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
accuracy_percentage = accuracy * 100
print("Accuracy:", accuracy_percentage, "%")

# Export the decision tree as a graph
dot = tree.export_graphviz(feature_names=data.columns[:-1])

# Save and display the decision tree
dot.render("decision_tree_CART", format="pdf", cleanup=True, view=True)