# Importing Required Libraries

In [18]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [19]:
#loading datasets
df=pd.read_csv('Slump.csv',sep='\t')

In [20]:
#perform basic EDA
df.head()

Unnamed: 0,Cement,Slag,Fly ash,Water,SP,Coarse Aggr.,Fine Aggr.,SLUMP(cm),FLOW(cm),Compressive Strength (28-day)(Mpa)
0,273.0,82.0,105.0,210.0,9.0,904.0,680.0,23.0,62.0,34.99
1,163.0,149.0,191.0,180.0,12.0,843.0,746.0,0.0,20.0,41.14
2,162.0,148.0,191.0,179.0,16.0,840.0,743.0,1.0,20.0,41.81
3,162.0,148.0,190.0,179.0,19.0,838.0,741.0,3.0,21.5,42.08
4,154.0,112.0,144.0,220.0,10.0,923.0,658.0,20.0,64.0,26.82


In [21]:
#checking the data sets
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 103 entries, 0 to 102
Data columns (total 10 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   Cement                              103 non-null    float64
 1   Slag                                103 non-null    float64
 2   Fly ash                             103 non-null    float64
 3   Water                               103 non-null    float64
 4   SP                                  103 non-null    float64
 5   Coarse Aggr.                        103 non-null    float64
 6   Fine Aggr.                          103 non-null    float64
 7   SLUMP(cm)                           103 non-null    float64
 8   FLOW(cm)                            103 non-null    float64
 9   Compressive Strength (28-day)(Mpa)  103 non-null    float64
dtypes: float64(10)
memory usage: 8.2 KB


In [22]:
#checking for the null values
df.isna().sum()

Cement                                0
Slag                                  0
Fly ash                               0
Water                                 0
SP                                    0
Coarse Aggr.                          0
Fine Aggr.                            0
SLUMP(cm)                             0
FLOW(cm)                              0
Compressive Strength (28-day)(Mpa)    0
dtype: int64

In [23]:
#splitting data sets into x & y
x=df.iloc[:,:-1].values
y=df.iloc[:,-1].values

In [28]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.25,random_state=20)

In [29]:
#model creating without hyperparameter tuning
from sklearn.tree import DecisionTreeRegressor

In [30]:
model=DecisionTreeRegressor()
model.fit(x_train,y_train)
y_pred=model.predict(x_test)

In [31]:
from sklearn.metrics import r2_score, mean_squared_error
r2_score(y_test,y_pred)

0.7543277093155928

In [32]:
#hyperparameter tuning

In [45]:
from sklearn.model_selection import GridSearchCV
param={
    'max_depth':[1,2,3,4,5,6,7,8,9],
    'min_samples_leaf':[1,2,3,4],
    'max_features':[2,3,4,5,6]
}
estimator=DecisionTreeRegressor()

In [46]:
best_model=GridSearchCV(estimator,param_grid=param,cv=5)

In [47]:
best_model.fit(x_train,y_train)

GridSearchCV(cv=5, estimator=DecisionTreeRegressor(),
             param_grid={'max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9],
                         'max_features': [2, 3, 4, 5, 6],
                         'min_samples_leaf': [1, 2, 3, 4]})

In [48]:
best_model.best_estimator_

DecisionTreeRegressor(max_depth=6, max_features=6, min_samples_leaf=2)

In [49]:
best_model.best_params_

{'max_depth': 6, 'max_features': 6, 'min_samples_leaf': 2}

In [50]:
best_model.best_score_

0.662128181216783

In [57]:
#model building with hyperparameter tuning 
model = DecisionTreeRegressor(criterion='mse', max_depth=5,max_features=7)

In [58]:
model.fit(x_train,y_train)



DecisionTreeRegressor(criterion='mse', max_depth=5, max_features=7)

In [60]:
#Evalualvating
r2_score(y_test,y_pred)

0.7543277093155928