# Example: SAS SWAT - Train a Decision Tree Classifier
This example trains a Decision Tree Classifier on the IRIS dataset and scores the data.

In [1]:
# Connect to SAS Viya Platform
import swat
conn = swat.CAS(hostname='localhost', port=5570, username='sas', password='saspw')

In [2]:
# Load Dataset from local CSV file
iris = conn.read_csv('data/iris.csv', casout=dict(name='iris', caslib='casuser'))
iris.head()

NOTE: Cloud Analytic Services made the uploaded file available as table IRIS in caslib CASUSER(sas).
NOTE: The table IRIS has been created in caslib CASUSER(sas) from binary data uploaded to Cloud Analytic Services.


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
# Load Decision Tree functionalities and train a Decision Tree Classifier
conn.loadactionset('decisiontree')
tree_model = iris.Dtreetrain()
tree_model.target = 'species'
tree_model.inputs = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
tree_model.casout = 'iris_dtree'
tree_model()

NOTE: Added action set 'decisiontree'.


Unnamed: 0,Descr,Value
0,Number of Tree Nodes,11.0
1,Max Number of Branches,2.0
2,Number of Levels,5.0
3,Number of Leaves,6.0
4,Number of Bins,50.0
5,Minimum Size of Leaves,5.0
6,Maximum Size of Leaves,50.0
7,Number of Variables,4.0
8,Confidence Level for Pruning,0.25
9,Number of Observations Used,150.0

Unnamed: 0,casLib,Name,Rows,Columns,casTable
0,CASUSER(sas),iris_dtree,11,25,"CASTable('iris_dtree', caslib='CASUSER(sas)')"


In [11]:
# Score the dataset with the trained model
iris.dtreescore(assess=False, 
                model=tree_model.casout, 
                copyVars=list(iris.columns),
                casout=dict(name='iris_dtree_scored', replace=True))

Unnamed: 0,casLib,Name,Rows,Columns,casTable
0,CASUSER(sas),iris_dtree_scored,150,16,"CASTable('iris_dtree_scored', caslib='CASUSER(..."

Unnamed: 0,Descr,Value
0,Number of Observations Read,150.0
1,Number of Observations Used,150.0
2,Misclassification Error (%),2.6666666667


In [12]:
# Display Scoring results
conn.CASTable('iris_dtree_scored')[list(iris.columns)+['_DT_PredName_']].tail(10)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,_DT_PredName_
140,6.1,2.6,5.6,1.4,Iris-virginica,Iris-virginica
141,7.7,3.0,6.1,2.3,Iris-virginica,Iris-virginica
142,6.3,3.4,5.6,2.4,Iris-virginica,Iris-virginica
143,6.4,3.1,5.5,1.8,Iris-virginica,Iris-virginica
144,6.0,3.0,4.8,1.8,Iris-virginica,Iris-virginica
145,6.9,3.1,5.4,2.1,Iris-virginica,Iris-virginica
146,6.7,3.1,5.6,2.4,Iris-virginica,Iris-virginica
147,6.9,3.1,5.1,2.3,Iris-virginica,Iris-virginica
148,5.8,2.7,5.1,1.9,Iris-virginica,Iris-virginica
149,6.8,3.2,5.9,2.3,Iris-virginica,Iris-virginica
