# Demo of a decision tree Leo transpilation - Iris dataset

## The three classes of the Iris dataset
![The three classes of the iris dataset](https://editor.analyticsvidhya.com/uploads/51518iris%20img1.png "The three classes of the iris dataset")

## Load the Iris dataset and explore the data

In [None]:
from sklearn.datasets import load_iris

# Load the iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# take a look at the data
print(f"Data shape: {X.shape}")
print(f"Feature names: {iris.feature_names}")
print(f"Label names: {iris.target_names}")
print(f"First row: {X[0]}")
print(f"First label: {y[0]}")

## Split the dataset into a training and a test set, and train the classifier

In [None]:
from sklearn.model_selection import train_test_split

# Split the dataset into a training and a test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
from sklearn.tree import DecisionTreeClassifier

# Create and train a decision tree classifier
clf = DecisionTreeClassifier(random_state=0)
clf.fit(X_train, y_train)

## Visualize the decision tree

In [None]:
import matplotlib.pyplot as plt
from sklearn.tree import plot_tree

# visualize the decision tree
plt.figure(figsize=(15, 7.5))
plot_tree(
    clf,
    filled=True,
    feature_names=iris.feature_names,
    class_names=iris.target_names.tolist(),
)
plt.show()

## Transpile the model into Leo code

In [None]:
import logging
import os

from leotranspiler import LeoTranspiler

# Set the logger
logger = logging.getLogger()
logger.setLevel(logging.INFO)

# Transpile the deceision tree into Leo code
lt = LeoTranspiler(model=clf, validation_data=X_train)
leo_project_path = os.path.join(os.getcwd(), "tmp/iris")
leo_project_name = "tree_iris_1"
lt.to_leo(path=leo_project_path, project_name=leo_project_name)

## Let's take a look at the transpiled Leo code
(the thresholds in the conditions are going to be different compared to the visual representation because of the fixed-point conversion)

In [None]:
# take a look at the transpiled code
leo_code_path = os.path.join(
    (os.path.join(leo_project_path, leo_project_name, "src", "main.leo"))
)
with open(leo_code_path, "r") as f:
    leo_code = f.read()
print(leo_code)

## Let's input one test data instance into the Leo and Python model and compare the predictions and labels

In [None]:
# prove and compare the Leo prediction with the Python prediction and the label
zkp = lt.execute(input_sample=X_test[0])
python_prediction = clf.predict([X_test[0]])

print(f"Circuit constraints: {zkp.circuit_constraints}")
print(f"Runtime: {zkp.runtime} seconds")
print(f"Active circuit inputs: {zkp.active_input_count}")
print(f"Leo prediction in fixed-point notation: {zkp.output[0]}")
print(f"Leo prediction in decimal notation: {zkp.output_decimal[0]}")
print(f"Python prediction: {python_prediction[0]}")
print(f"Label: {y_test[0]}")
print(f"Proof: {zkp.proof}")

## Let's compare the classification accuracy for the Leo and Python model over the entire test data set 

In [None]:
import numpy as np

# Compute the accuracy of the Leo program and the Python program on the test set
num_test_samples = len(X_test)
leo_predictions = np.zeros(num_test_samples)
for i in range(num_test_samples):
    leo_predictions[i] = lt.run(input_sample=X_test[i]).output_decimal[0]

python_predictions = clf.predict(X_test)

leo_accuracy = np.sum(leo_predictions == y_test) / num_test_samples
python_accuracy = np.sum(python_predictions == y_test) / num_test_samples

print(f"Leo accuracy: {100*leo_accuracy} %")
print(f"Python accuracy: {100*python_accuracy} %")

## We can also set the model parameters as inputs instead of hardcoding these

In [None]:
leo_project_name = "tree_iris_2"
lt.to_leo(path=leo_project_path, project_name=leo_project_name, model_as_input=True)

## Taking a look to check ...

In [None]:
# take a look at the transpiled code
leo_code_path = os.path.join(
    (os.path.join(leo_project_path, leo_project_name, "src", "main.leo"))
)
with open(leo_code_path, "r") as f:
    leo_code = f.read()
print(leo_code)

## Let's run the proving again
(we expect the circuit to have more inputs and slightly more constraints now, more inputs, and also the proving to run slightly longer)

In [None]:
# prove and compare the Leo prediction with the Python prediction and the label
zkp = lt.execute(input_sample=X_test[0])
python_prediction = clf.predict([X_test[0]])

print(f"Circuit constraints: {zkp.circuit_constraints}")
print(f"Runtime: {zkp.runtime} seconds")
print(f"Active circuit inputs: {zkp.active_input_count}")
print(f"Leo prediction in fixed-point notation: {zkp.output[0]}")
print(f"Leo prediction in decimal notation: {zkp.output_decimal[0]}")
print(f"Python prediction: {python_prediction[0]}")
print(f"Label: {y_test[0]}")
print(f"Proof: {zkp.proof}")