# **Decision Trees**

In [28]:
# loading Libraries
from sklearn.datasets import load_iris
from sklearn.datasets import fetch_california_housing
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier #loading decision tree classifier
from sklearn.tree import DecisionTreeRegressor #loading decision tree regressor
from sklearn.model_selection import train_test_split

In [29]:
# loading datasets
iris=load_iris()
housing=fetch_california_housing()

In [30]:
# specifying input and output
x1,y1=iris.data,iris.target
x2,y2=housing.data,housing.target

## **Decision Tree Classifier**




In [31]:
x_train,x_test,y_train,y_test=train_test_split(x1,y1,test_size=0.3,random_state=42)

In [32]:
# loading DecisionTreeClassifier and specifying the criterion=gini
# max depth 1 : root, max depth 2 : children, max depth 3 : grandchildren
clf=DecisionTreeClassifier(criterion='gini',max_depth=3,random_state=42)

In [33]:
clf.fit(x_train,y_train)

In [34]:
y_pred=clf.predict(x_test)

In [35]:
print("Classification Accuracy: ",accuracy_score(y_test,y_pred))

Classification Accuracy:  1.0


**Testing**

In [36]:
# New sample (must be in 2D array form)
new_data = [[5.1, 3.5, 1.4, 0.2]]

# Predict class
prediction = clf.predict(new_data)
print("Predicted class (numeric):", prediction)
print("Predicted species:", iris.target_names[prediction][0])


Predicted class (numeric): [0]
Predicted species: setosa


## **Decision Tree Regressor**


In [37]:
x_train,x_test,y_train,y_test=train_test_split(x2,y2,test_size=0.2,random_state=42)

In [38]:
# loading DecisionTreeClassifier and specifying the criterion=squared error (mean sqaured error)
rgs=DecisionTreeRegressor(criterion='squared_error',max_depth=3,random_state=42)

In [39]:
rgs.fit(x_train,y_train)

In [40]:
y_pred=rgs.predict(x_test)

In [41]:
print("Regression MSE: ",mean_squared_error(y_test,y_pred))

Regression MSE:  0.6424109810265641


**Testing**

In [47]:
# Pick one sample from X_test (input features)
new_data = [x_test[0]]   # this has the right number of features (8 for California housing)

# Predict continuous value
prediction = rgs.predict(new_data)
print("Predicted value:", prediction[0])
print("Actual value:", y_test[0])


Predicted value: 1.6258228849085399
Actual value: 0.477
