## Decision Tree for classification

In [1]:
import pandas as pd
import seaborn as sns
import numpy as np

In [5]:
df = sns.load_dataset('iris')

In [6]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['species'] = le.fit_transform(df['species'])

In [7]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [8]:
x = df.drop('species', axis = 1)
y = df['species']

In [9]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.2, random_state=42)

In [None]:
from sklearn.tree import DecisionTreeClassifier
DT_c = DecisionTreeClassifier(min_samples_split = 50,
                              max_depth= 5,
                              random_state=42 )


In [11]:
DT_c.fit(x_train, y_train)

0,1,2
,criterion,'gini'
,splitter,'best'
,max_depth,5
,min_samples_split,50
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,42
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [12]:
y_pred_DT_c = DT_c.predict(x_test)

In [14]:
from sklearn.metrics import accuracy_score, classification_report
score = accuracy_score(y_pred_DT_c, y_test)
print(score)
print( classification_report(y_pred_DT_c, y_test) )

0.9666666666666667
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       0.89      1.00      0.94         8
           2       1.00      0.92      0.96        12

    accuracy                           0.97        30
   macro avg       0.96      0.97      0.97        30
weighted avg       0.97      0.97      0.97        30



## Decison tree for regression

In [None]:
column_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']

In [16]:
df = pd.read_csv('Dataset/housing.csv', header=None, delimiter=r"\s+", names = column_names)

In [17]:
x = df.drop('MEDV', axis = 1)
y = df['MEDV']

In [18]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.2, random_state=42)

In [22]:
from sklearn.preprocessing import StandardScaler
scale = StandardScaler()
x_train = scale.fit_transform(x_train)
x_test = scale.transform(x_test)

In [23]:
from sklearn.tree import DecisionTreeRegressor
DT_r = DecisionTreeRegressor()
DT_r.fit(x_train, y_train)

0,1,2
,criterion,'squared_error'
,splitter,'best'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [24]:
y_pred_DT_r = DT_r.predict(x_test)

In [25]:
from sklearn.metrics import r2_score
score = r2_score(y_pred_DT_r, y_test)
print(score)

0.8316402180773004


#### regression using hyperparameter tuning

In [29]:
parameter={
 'criterion':['squared_error','friedman_mse','absolute_error','poisson'],
  'splitter':['best','random'],
  'max_depth':[1,2,3,4,5,6,7,8,10,11,12],
  'max_features':['auto', 'sqrt', 'log2']
    
}

In [31]:
from sklearn.model_selection import GridSearchCV
DTR_cv = GridSearchCV(DT_r, param_grid = parameter, scoring = 'neg_mean_squared_error', cv= 5)
DTR_cv.fit(x_train, y_train)

440 fits failed out of a total of 1320.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
440 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/rachin/Desktop/rachin/ai/lib/python3.13/site-packages/sklearn/model_selection/_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/rachin/Desktop/rachin/ai/lib/python3.13/site-packages/sklearn/base.py", line 1358, in wrapper
    estimator._validate_params()
    ~~~~~~~~~~~~~~~~~~~~~~~~~~^^
  File "/Users/rachin/Desktop/rachin/ai/lib/python3.13/site-packages/sklearn/base.py", line 471, in _validate_params
    validate_parameter_constraints(
    ~~~~~~~~~~~~

0,1,2
,estimator,DecisionTreeRegressor()
,param_grid,"{'criterion': ['squared_error', 'friedman_mse', ...], 'max_depth': [1, 2, ...], 'max_features': ['auto', 'sqrt', ...], 'splitter': ['best', 'random']}"
,scoring,'neg_mean_squared_error'
,n_jobs,
,refit,True
,cv,5
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,criterion,'absolute_error'
,splitter,'best'
,max_depth,8
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'log2'
,random_state,
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [33]:
DTR_cv.best_params_

{'criterion': 'absolute_error',
 'max_depth': 8,
 'max_features': 'log2',
 'splitter': 'best'}

In [35]:
y_pred = DTR_cv.predict(x_test)

In [36]:
score = r2_score(y_pred, y_test)
print(score)

0.8144730685571824
