In [1]:
#import pandas library

import pandas as pd

In [2]:
# load datset

dataset=pd.read_csv("50_Startups.csv")
dataset

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
0,165349.2,136897.8,471784.1,New York,192261.83
1,162597.7,151377.59,443898.53,California,191792.06
2,153441.51,101145.55,407934.54,Florida,191050.39
3,144372.41,118671.85,383199.62,New York,182901.99
4,142107.34,91391.77,366168.42,Florida,166187.94
5,131876.9,99814.71,362861.36,New York,156991.12
6,134615.46,147198.87,127716.82,California,156122.51
7,130298.13,145530.06,323876.68,Florida,155752.6
8,120542.52,148718.95,311613.29,New York,152211.77
9,123334.88,108679.17,304981.62,California,149759.96


In [3]:
# Categorical Input data

dataset=pd.get_dummies(dataset, drop_first=True)
dataset

Unnamed: 0,R&D Spend,Administration,Marketing Spend,Profit,State_Florida,State_New York
0,165349.2,136897.8,471784.1,192261.83,0,1
1,162597.7,151377.59,443898.53,191792.06,0,0
2,153441.51,101145.55,407934.54,191050.39,1,0
3,144372.41,118671.85,383199.62,182901.99,0,1
4,142107.34,91391.77,366168.42,166187.94,1,0
5,131876.9,99814.71,362861.36,156991.12,0,1
6,134615.46,147198.87,127716.82,156122.51,0,0
7,130298.13,145530.06,323876.68,155752.6,1,0
8,120542.52,148718.95,311613.29,152211.77,0,1
9,123334.88,108679.17,304981.62,149759.96,0,0


In [4]:
# view dataset column names

dataset.columns

Index(['R&D Spend', 'Administration', 'Marketing Spend', 'Profit',
       'State_Florida', 'State_New York'],
      dtype='object')

In [5]:
# split input data

independent=dataset[['R&D Spend', 'Administration', 'Marketing Spend', 
       'State_Florida', 'State_New York']]
independent

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State_Florida,State_New York
0,165349.2,136897.8,471784.1,0,1
1,162597.7,151377.59,443898.53,0,0
2,153441.51,101145.55,407934.54,1,0
3,144372.41,118671.85,383199.62,0,1
4,142107.34,91391.77,366168.42,1,0
5,131876.9,99814.71,362861.36,0,1
6,134615.46,147198.87,127716.82,0,0
7,130298.13,145530.06,323876.68,1,0
8,120542.52,148718.95,311613.29,0,1
9,123334.88,108679.17,304981.62,0,0


In [8]:
# split output data

dependent=dataset[['Profit']]
dependent

Unnamed: 0,Profit
0,192261.83
1,191792.06
2,191050.39
3,182901.99
4,166187.94
5,156991.12
6,156122.51
7,155752.6
8,152211.77
9,149759.96


In [9]:
# GridSearchCV Model Creation - Decision Tree

# criterion{“squared_error”, “friedman_mse”, “absolute_error”, “poisson”}, default=”squared_error”
# splitter{“best”, “random”}, default=”best”
# max_featuresint, float or {“auto”, “sqrt”, “log2”}, default=None

from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor

param_grid= { 'criterion':['squared_error', 'friedman_mse', 'absolute_error','poisson'],
               'splitter':['best','random'],
            'max_features':['auto', 'sqrt','log2']}

grid=GridSearchCV(DecisionTreeRegressor(), param_grid, refit=True, verbose=3, n_jobs=-1)

grid.fit(independent,dependent)

Fitting 5 folds for each of 24 candidates, totalling 120 fits




In [10]:
#Print hyper tuning parameter and r2_score

result=grid.cv_results_

print("The r2_score with hyper tuning parameter {}".format(grid.best_params_))

The r2_score with hyper tuning parameter {'criterion': 'absolute_error', 'max_features': 'auto', 'splitter': 'random'}


In [11]:
# view in table format

table=pd.DataFrame.from_dict(result)
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_splitter,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.237491,0.1486857,0.003126,0.006251,squared_error,auto,best,"{'criterion': 'squared_error', 'max_features':...",-6.501088,-4.55302,-14.123293,-2.171994,-1.667017,-5.803282,4.507052,8
1,0.069763,0.006352183,0.012974,0.006551,squared_error,auto,random,"{'criterion': 'squared_error', 'max_features':...",-5.019855,-4.906341,-32.5016,-4.357303,-3.884703,-10.133961,11.191214,11
2,0.014443,0.00167399,0.024711,0.022173,squared_error,sqrt,best,"{'criterion': 'squared_error', 'max_features':...",-9.346677,-3.196038,-37.7688,-12.882667,-6.23664,-13.886164,12.367605,16
3,0.015372,0.0007041292,0.007922,0.006633,squared_error,sqrt,random,"{'criterion': 'squared_error', 'max_features':...",-13.990602,-6.65413,-38.441357,-14.096135,-7.635925,-16.16363,11.562405,19
4,0.018243,0.01199963,0.015357,0.005146,squared_error,log2,best,"{'criterion': 'squared_error', 'max_features':...",-10.005193,-8.114506,-8.807597,-22.880879,-3.139554,-10.589546,6.576046,13
5,0.012501,0.006250525,0.018749,0.022963,squared_error,log2,random,"{'criterion': 'squared_error', 'max_features':...",-5.951787,-8.465897,-36.73445,-22.713262,-2.171779,-15.207435,12.812443,17
6,0.162495,0.1034527,0.003125,0.00625,friedman_mse,auto,best,"{'criterion': 'friedman_mse', 'max_features': ...",-6.462478,-2.754022,-13.886081,-2.295691,-1.661809,-5.412016,4.555298,5
7,0.146871,0.1769373,0.00625,0.007654,friedman_mse,auto,random,"{'criterion': 'friedman_mse', 'max_features': ...",-5.435516,-9.24899,-6.922287,-1.873478,-2.492512,-5.194556,2.749987,2
8,0.015624,0.00988159,0.01875,0.006248,friedman_mse,sqrt,best,"{'criterion': 'friedman_mse', 'max_features': ...",-12.19779,-4.071307,-23.686684,-8.931842,-3.548523,-10.487229,7.33322,12
9,0.021874,0.01249692,0.009374,0.007654,friedman_mse,sqrt,random,"{'criterion': 'friedman_mse', 'max_features': ...",-10.662413,-3.804894,-73.795433,-2.930017,-2.184476,-18.675446,27.725171,21


In [12]:
#Output from EndUser

rd_spend=float(input("R & D Spend :"))
administration=float(input("Administration :"))
marketing_spend=float(input("Marketing Spend :"))
state_florida=int(input("State_Florida 1 0r 0:"))
state_newyork=int(input("State_NewYork 1 0r 0 :"))


#Output Prediction

Prediction=grid.predict([[rd_spend, administration, marketing_spend, state_florida, state_newyork]])
print("Future Prediction {} ".format(Prediction))

R & D Spend :10000
Administration :10000
Marketing Spend :10000
State_Florida 1 0r 0:1
State_NewYork 1 0r 0 :0
Future Prediction [49490.75] 


