# EazyML Explainable AI Template

## Define Imports

In [None]:
!pip install --upgrade eazyml-xai
!pip install --upgrade eazyml-automl
!pip install gdown python-dotenv

In [None]:
import os
from eazyml_xai import (
    ez_init,
    ez_explain,
    ez_get_data_type,
    create_onehot_encoded_features,
)

from eazyml import ez_display_df
import gdown
import pandas as pd

from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

from dotenv import load_dotenv
load_dotenv()

## 1. Initialize EazyML

The `ez_init` function uses the `EAZYML_ACCESS_KEY` environment variable for authentication. If the variable is not set, it defaults to a trial license.

In [None]:
ez_init(access_key=os.getenv('EAZYML_ACCESS_KEY'))

## 2. Define Dataset Files and Outcome Variable

In [None]:
gdown.download_folder(id='1DJtU6gI929GdEEZ3F_7w5LMnT90VvYI7')

In [None]:
# Names of the files that will be used by EazyML APIs
train_file_path = os.path.join('data', 'IRIS_Train.csv')
test_file_path  = os.path.join('data', 'IRIS_Test.csv')

# The column name for outcome of interest
outcome = 'species'

## 3. Dataset Information

The dataset used in this notebook is the **Iris Dataset**, which is a well-known dataset in machine learning and statistics. It contains data about 150 iris flowers, with four features (sepal length, sepal width, petal length, and petal width) and the species of the flower (setosa, versicolor, or virginica).

You can find more details and download the dataset from Kaggle using the following link:

[Kaggle Iris Dataset](https://www.kaggle.com/datasets/uciml/iris)

### Columns in the Dataset:
- **sepal_length**: Sepal length of the flower (cm)
- **sepal_width**: Sepal width of the flower (cm)
- **petal_length**: Petal length of the flower (cm)
- **petal_width**: Petal width of the flower (cm)
- **species**: Species of the iris flower (setosa, versicolor, virginica)

### 3.1 Display the Dataset

Below is a preview of the dataset:

In [None]:
# Load the dataset from the provided file
train = pd.read_csv(train_file_path)

# Display the first few rows of the dataset
train.head()

## 4. EazyML Preprocessing Steps Before Model Training

### 4.1 Reading the Datasets and Dropping Unnecessary Columns

In [None]:
discard_columns = []

# Reading Training Data
train = pd.read_csv(train_file_path)
train = train.drop(columns=discard_columns)

### 4.2 Getting the Data Type of Features and Creating Dummy Features

In [None]:
# Getting Data Type of Features
type_df = ez_get_data_type(train, outcome)

In [None]:
# List of Categorical Columns
cat_list = type_df[type_df['Data Type'] == 'categorical']['Variable Name'].tolist()
cat_list = [ele for ele in cat_list if ele != outcome]

# Creating Dummy Features
train = create_onehot_encoded_features(train, cat_list)

## 5. Training Bagging Classifer Model

In [None]:
y = train[outcome]
X = train.drop(outcome, axis=1)

model_name = BaggingClassifier(estimator=DecisionTreeClassifier(
    class_weight=None, criterion='gini', max_depth=None,
    max_features=None, max_leaf_nodes=None,
    min_samples_leaf=1,
    min_samples_split=2, min_weight_fraction_leaf=0.0,
    random_state=None, splitter='best'),
    bootstrap=True, bootstrap_features=False, max_features=1.0,
    max_samples=1.0, n_estimators=5, n_jobs=None, oob_score=False,
    random_state=42, verbose=0, warm_start=False)

model = model_name.fit(X, y)

## 6. Get Explanations

### 6.1 Get Explanations for Top 2 Points

In [None]:
options = {'record_number': [1, 2]}
response = ez_explain(train, outcome, test_file_path, model, options=options)

### 6.2 Display Explanation DataFrame

In [None]:
ex_df = pd.DataFrame([i.values() for i in response['explanations']], columns=response['explanations'][0].keys())
ez_display_df(ex_df)