In [10]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor 
from sklearn.metrics import r2_score,mean_squared_error,make_scorer
import math
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.model_selection import train_test_split
import seaborn as sns

In [2]:
turbine_data=pd.read_excel('cleaned_turbine_data.xlsx')
turbine_data.set_index('TimeStamp',inplace=True)
turbine_data=turbine_data.drop(columns='Comp Inlet Temp')
X=turbine_data.loc[:,'Exhaust temp':'Comp discharge temp']
y=turbine_data.loc[:,'Generated watts']

In [3]:
r2=make_scorer(r2_score)
grid_parameters={"splitter":["best","random"],"max_depth" : [1,3,5,7,9],"min_samples_leaf":[1,2,3,4,5],"min_weight_fraction_leaf":[0.1,0.2,0.3,0.4,0.5],"max_features":["auto","log2"],"max_leaf_nodes":[10,20,30,40,50] }
clf=GridSearchCV(DecisionTreeRegressor(),param_grid=grid_parameters,cv=10,return_train_score=False,scoring=r2)
clf.fit(X,y)

GridSearchCV(cv=10, estimator=DecisionTreeRegressor(),
             param_grid={'max_depth': [1, 3, 5, 7, 9],
                         'max_features': ['auto', 'log2'],
                         'max_leaf_nodes': [10, 20, 30, 40, 50],
                         'min_samples_leaf': [1, 2, 3, 4, 5],
                         'min_weight_fraction_leaf': [0.1, 0.2, 0.3, 0.4, 0.5],
                         'splitter': ['best', 'random']},
             scoring=make_scorer(r2_score))

In [4]:
clf.best_params_

{'max_depth': 9,
 'max_features': 'log2',
 'max_leaf_nodes': 40,
 'min_samples_leaf': 2,
 'min_weight_fraction_leaf': 0.1,
 'splitter': 'best'}

In [5]:
clf.best_score_

0.9548492801837138

In [9]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)
regressor=DecisionTreeRegressor(max_depth=9,max_features='log2',max_leaf_nodes=40,min_samples_leaf=2,min_weight_fraction_leaf=0.1,splitter='best')
regressor.fit(X_train,y_train)
y_pred=regressor.predict(X_test)
print("R_squared: " + str(round(r2_score(y_test,y_pred),4)))
print("RMSE: " + str(round(math.sqrt(mean_squared_error(y_test,y_pred)),4)))

R_squared: 0.9871
RMSE: 4.208


In [21]:
def getPredictions(input_value):
    print('Generated Watts: ' ,regressor.predict(input_value))


In [22]:
input_Value=[[1100.11,57.32,113.05,644.57]]
getPredictions(input_Value)


Generated Watts:  [55.91674586]
