In [189]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV

In [190]:
dataset = load_diabetes()
dataset

{'data': array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
          0.01990749, -0.01764613],
        [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
         -0.06833155, -0.09220405],
        [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
          0.00286131, -0.02593034],
        ...,
        [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
         -0.04688253,  0.01549073],
        [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
          0.04452873, -0.02593034],
        [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
         -0.00422151,  0.00306441]]),
 'target': array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
         69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
         68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
         87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
        259.,  53., 190., 142.,  75., 142., 155., 225.,  59

In [191]:
#independent feature

X = pd.DataFrame(dataset.data , columns=dataset.feature_names)
X.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641


In [192]:
#dependent feature

y = dataset.target
y

array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
        69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
        68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
        87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
       259.,  53., 190., 142.,  75., 142., 155., 225.,  59., 104., 182.,
       128.,  52.,  37., 170., 170.,  61., 144.,  52., 128.,  71., 163.,
       150.,  97., 160., 178.,  48., 270., 202., 111.,  85.,  42., 170.,
       200., 252., 113., 143.,  51.,  52., 210.,  65., 141.,  55., 134.,
        42., 111.,  98., 164.,  48.,  96.,  90., 162., 150., 279.,  92.,
        83., 128., 102., 302., 198.,  95.,  53., 134., 144., 232.,  81.,
       104.,  59., 246., 297., 258., 229., 275., 281., 179., 200., 200.,
       173., 180.,  84., 121., 161.,  99., 109., 115., 268., 274., 158.,
       107.,  83., 103., 272.,  85., 280., 336., 281., 118., 317., 235.,
        60., 174., 259., 178., 128.,  96., 126., 28

In [193]:
X_train , X_test , y_train , y_test = train_test_split(X , y , test_size=0.33 , random_state=42)

In [194]:
regressor = DecisionTreeRegressor()
regressor.fit(X_train , y_train)

In [195]:
y_predict = regressor.predict(X_test)
y_predict

array([196., 225., 131., 310.,  51., 230., 200., 248., 245., 196., 113.,
       126.,  85., 270.,  25.,  72., 178., 279., 109., 131.,  85.,  83.,
        50., 150., 141., 178., 288., 155.,  59., 177., 200.,  50., 196.,
       163., 164., 180.,  94.,  85., 259.,  96.,  39.,  91., 252., 277.,
       134.,  75.,  31., 146.,  39., 185., 144.,  75., 145.,  59., 292.,
       142., 104., 288.,  51.,  63., 128., 147., 141., 177.,  83., 296.,
       152., 190., 178., 109., 163., 244., 152., 104.,  96., 248., 248.,
       178., 131., 248.,  49.,  59., 160.,  81.,  79.,  59., 143.,  42.,
       147., 270.,  85., 225.,  42.,  77.,  72., 143., 252.,  85., 111.,
        75., 270., 114., 275., 142., 196., 150., 200., 155., 128., 225.,
       129., 221., 217., 174., 122., 245., 183.,  77.,  85., 246., 276.,
       104., 178., 235.,  47., 190., 141., 199.,  93., 152., 253.,  92.,
       221., 246., 275.,  85., 310.,  85.,  53., 128.,  70., 235., 137.,
        77., 259., 229.])

In [196]:
score = r2_score(y_predict , y_test)
score

-0.15611556159715811

In [197]:
#cross validation

parameter = {
    'criterion' : ['squared_error' , 'friedman_mse' , 'absolute_error' , 'poisson'],
    'splitter' : ['best' , 'random'],
    'max_depth' : [1,2,3,4,5,6,7,8,9,10,11,12],
    'max_features' : ['auto' , 'sqrt' , 'log2']
}

regressor = DecisionTreeRegressor()

In [198]:
regressor = GridSearchCV(regressor , param_grid=parameter , cv=5, scoring="neg_mean_squared_error")
regressor.fit(X_train , y_train)

480 fits failed out of a total of 1440.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
480 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Acer\AppData\Roaming\Python\Python312\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Acer\AppData\Roaming\Python\Python312\site-packages\sklearn\base.py", line 1467, in wrapper
    estimator._validate_params()
  File "C:\Users\Acer\AppData\Roaming\Python\Python312\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\Acer\AppData\Roaming\Python\Python312\site-packages\sklearn\utils\_param_validatio

In [199]:
regressor.best_params_

{'criterion': 'squared_error',
 'max_depth': 3,
 'max_features': 'sqrt',
 'splitter': 'best'}

In [200]:
y_predict = regressor.predict(X_test)

In [201]:
r2_score(y_predict , y_test)

-0.2688125975934983