In [1]:
import tensorflow_decision_forests as tfdf
from sklearn.model_selection import train_test_split
import pandas as pd

##### ❇️ Setup

In [2]:
# Load and prepare data; Our task would be to predict specie of the penguin
dataset_df = pd.read_csv('penguins.csv')
label = "species"
classes = dataset_df[label].unique().tolist()
print(f"Label classes: {classes}")
dataset_df[label] = dataset_df[label].map(classes.index)
dataset_df.head(2)

Label classes: ['Adelie', 'Gentoo', 'Chinstrap']


Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex,year
0,0,Torgersen,39.1,18.7,181.0,3750.0,male,2007
1,0,Torgersen,39.5,17.4,186.0,3800.0,female,2007


In [3]:
# Split data into train/test
train_ds_pd, test_ds_pd = train_test_split(dataset_df, test_size=0.3)

In [6]:
# Converting pandas dataframes to tensorflow datasets
train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(train_ds_pd, label=label)
test_ds = tfdf.keras.pd_dataframe_to_tf_dataset(test_ds_pd, label=label)

##### ❇️ Training

In [7]:
# Load and train model
model = tfdf.keras.RandomForestModel()
model.fit(x=train_ds)

##### ❇️ Evaluation

In [8]:
model.compile(metrics=["accuracy"])
evaluation = model.evaluate(test_ds, return_dict=True)
for name, value in evaluation.items():
    print(f"{name}: {value:.4f}")

loss: 0.0000
accuracy: 0.9615


##### ❇️ Save model; ready to be served using tf-serving

In [13]:
model.save("/path_to_save_model_directory")

##### ❇️ Hope you enjoyed reading!! 📖 
##### ❇️ follow → @akshay_pachaar  

In [5]:
import warnings
warnings.filterwarnings('ignore')