## Classifier

In [28]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split,cross_val_score,GridSearchCV
from sklearn.metrics import classification_report,confusion_matrix,f1_score,recall_score,precision_score,accuracy_score

In [23]:
iris = load_iris()
X = iris.data 
y = iris.target

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=22)
tree_clf = DecisionTreeClassifier()
tree_clf.fit(X_train, y_train)
y_pred=tree_clf.predict(X_test)

In [24]:

print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         6
           1       0.71      1.00      0.83        10
           2       1.00      0.71      0.83        14

    accuracy                           0.87        30
   macro avg       0.90      0.90      0.89        30
weighted avg       0.90      0.87      0.87        30



In [25]:
confusion_matrix(y_test,y_pred)

array([[ 6,  0,  0],
       [ 0, 10,  0],
       [ 0,  4, 10]])

In [26]:
scores=cross_val_score(tree_clf,X,y,cv=5)

In [27]:
scores.mean()

np.float64(0.9666666666666668)

In [29]:
params={
    "criterion":['gini','entropy'],
    "max_depth":[1,2,3,4,5,6,7,None],
    "min_samples_split":[2,5,10],
    "min_samples_leaf":[1,2,4,8]
}

In [30]:
grid_search=GridSearchCV(estimator=tree_clf,param_grid=params,cv=5,scoring="accuracy")

In [31]:
grid_search.fit(X_train,y_train)

0,1,2
,estimator,DecisionTreeClassifier()
,param_grid,"{'criterion': ['gini', 'entropy'], 'max_depth': [1, 2, ...], 'min_samples_leaf': [1, 2, ...], 'min_samples_split': [2, 5, ...]}"
,scoring,'accuracy'
,n_jobs,
,refit,True
,cv,5
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,criterion,'gini'
,splitter,'best'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,2
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [32]:
print(f"Best params {grid_search.best_params_}")
print(f"Best score {grid_search.best_score_}")

Best params {'criterion': 'gini', 'max_depth': 3, 'min_samples_leaf': 2, 'min_samples_split': 2}
Best score 0.9583333333333334


In [33]:
model=grid_search.best_estimator_

In [34]:
y_pred=model.predict(X_test)
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         6
           1       0.91      1.00      0.95        10
           2       1.00      0.93      0.96        14

    accuracy                           0.97        30
   macro avg       0.97      0.98      0.97        30
weighted avg       0.97      0.97      0.97        30



In [35]:
confusion_matrix(y_test,y_pred)

array([[ 6,  0,  0],
       [ 0, 10,  0],
       [ 0,  1, 13]])

## Regressor

In [47]:
from sklearn.datasets import fetch_california_housing
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score,root_mean_squared_error

In [39]:
df=fetch_california_housing()

In [41]:
X=df.data
y=df.target

In [46]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=12)

In [51]:
tree_reg=DecisionTreeRegressor(random_state=11)
tree_reg.fit(X_train,y_train)
y_pred_reg=tree_reg.predict(X_test)
print(f"R2 score {r2_score(y_test,y_pred_reg)}")
print(f"Root Mean Square {root_mean_squared_error(y_test,y_pred_reg)}")

R2 score 0.6492000952539674
Root Mean Square 0.6935476397615543


In [52]:
params={
    "criterion":['squared_error','friedman_mse','poisson'],
    "splitter":['best','random'],
    "max_depth":[1,2,4,5,None],
    "min_samples_split":[2,5,10,20,30],
    "min_samples_leaf":[2,4,8]
}

In [55]:
grid=GridSearchCV(estimator=tree_reg,param_grid=params,cv=5,scoring='r2')

In [56]:
grid.fit(X_train,y_train)

0,1,2
,estimator,DecisionTreeR...ndom_state=11)
,param_grid,"{'criterion': ['squared_error', 'friedman_mse', ...], 'max_depth': [1, 2, ...], 'min_samples_leaf': [2, 4, ...], 'min_samples_split': [2, 5, ...], ...}"
,scoring,'r2'
,n_jobs,
,refit,True
,cv,5
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,criterion,'friedman_mse'
,splitter,'best'
,max_depth,
,min_samples_split,30
,min_samples_leaf,8
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,11
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [57]:
print(f"Best params {grid.best_params_}")
print(f"Best score {grid.best_score_}")

Best params {'criterion': 'friedman_mse', 'max_depth': None, 'min_samples_leaf': 8, 'min_samples_split': 30, 'splitter': 'best'}
Best score 0.7188398656554889


In [58]:
model_reg=grid.best_estimator_

In [62]:
y_pred_reg=model_reg.predict(X_test)
print(f"R2 score {r2_score(y_test,y_pred_reg)}")

R2 score 0.7556701313203441
