# Importing The Libraries

In [2]:
import pandas as pd
import numpy as np
import json

#  Parsing the json file 

In [3]:
with open('algoparams_from_ui.json') as f:
    data = json.load(f)


In [4]:
target = data['design_state_data']['target']
models = data['design_state_data']['algorithms']

In [5]:
target

{'prediction_type': 'Regression',
 'target': 'petal_width',
 'type': 'regression',
 'partitioning': True}

In [6]:
models

{'RandomForestClassifier': {'model_name': 'Random Forest Classifier',
  'is_selected': False,
  'min_trees': 10,
  'max_trees': 30,
  'feature_sampling_statergy': 'Default',
  'min_depth': 20,
  'max_depth': 30,
  'min_samples_per_leaf_min_value': 5,
  'min_samples_per_leaf_max_value': 50,
  'parallelism': 0},
 'RandomForestRegressor': {'model_name': 'Random Forest Regressor',
  'is_selected': True,
  'min_trees': 10,
  'max_trees': 20,
  'feature_sampling_statergy': 'Default',
  'min_depth': 20,
  'max_depth': 25,
  'min_samples_per_leaf_min_value': 5,
  'min_samples_per_leaf_max_value': 10,
  'parallelism': 0},
 'GBTClassifier': {'model_name': 'Gradient Boosted Trees',
  'is_selected': False,
  'num_of_BoostingStages': [67, 89],
  'feature_sampling_statergy': 'Fixed number',
  'learningRate': [],
  'use_deviance': True,
  'use_exponential': False,
  'fixed_number': 22,
  'min_subsample': 1,
  'max_subsample': 2,
  'min_stepsize': 0.1,
  'max_stepsize': 0.5,
  'min_iter': 20,
  'max_i

# Loading The Dataset and Checking for null values

In [8]:
iris = pd.read_csv('iris.csv')

In [9]:
iris.isnull().sum()

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64

In [10]:
iris['species'].value_counts()

Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
Name: species, dtype: int64

In [11]:
iris.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [12]:
(iris['petal_width'] == 0).sum()

0

In [13]:
(iris['sepal_length'] == 0).sum()

0

In [14]:
(iris['sepal_width'] == 0).sum()

0

In [16]:
(iris['petal_length'] == 0).sum()

0

In [None]:
# As there is no missing values there is no need to impute values

# Training the Model

In [17]:
#one hot encoding on dataset

iris = pd.get_dummies(iris,columns=['species'],drop_first = True)
 

In [18]:
X = iris.drop(columns = ['petal_width'],axis = 1)
Y = iris.petal_width

In [19]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.2, random_state=2)


In [20]:
def models(X_train,Y_train):
    
      
      #Using Random Forest Regression
    from sklearn.ensemble import RandomForestRegressor
    rfr = RandomForestRegressor(n_estimators = 20 ,max_depth = 20 , min_samples_leaf = 10)
    rfr.fit(X_train,Y_train)
    
    #Using GBT Regressor
    from sklearn.ensemble import GradientBoostingRegressor
    gbtr = GradientBoostingRegressor(n_estimators= 89,max_depth=7)
    gbtr.fit(X_train,Y_train)
    
    #Using Linear Regression
    from sklearn.linear_model import LinearRegression
    lr = LinearRegression()
    lr.fit(X_train,Y_train)
    
    #Using Ridge Regression
    from sklearn.linear_model import Ridge
    rdr = Ridge(max_iter = 50)
    rdr.fit(X_train,Y_train)
    
    #Using Lasso Regression
    from sklearn.linear_model import Lasso
    lsr = Lasso(max_iter = 50)
    lsr.fit(X_train,Y_train)
    
    #Using Elastic Net Regression
    from sklearn.linear_model import ElasticNet
    enr = ElasticNet(max_iter = 50)
    enr.fit(X_train,Y_train)
    
    #Using Xgboost
    import xgboost as xg
    xgb_r = xg.XGBRegressor()
    xgb_r.fit(X_train,Y_train)
    
    #Using Decision Tree regression
    from sklearn.tree import DecisionTreeRegressor
    dtr = DecisionTreeRegressor(min_samples_leaf = 12)
    dtr.fit(X_train,Y_train)
    
    #Using SVR 
    from sklearn.svm import SVR
    svr = SVR(kernel = 'rbf',gamma = 7,max_iter = 7)
    svr.fit(X_train,Y_train)
    
    
    

  
      

    #print model accuracy on the training data.
    print('[0]Random Forest Regression Training Accuracy:', rfr.score(X_train, Y_train))
    print('[1]GBT Regressor Training Accuracy:', gbtr.score(X_train, Y_train))
    print('[2]Linear Regression Training Accuracy:', lr.score(X_train, Y_train))
    print('[3]Ridge Regression Training Accuracy:', rdr.score(X_train, Y_train))
    print('[4]Lasso Regression Training Accuracy:', lsr.score(X_train, Y_train))
    print('[5]Elastic Net Regression Training Accuracy:', enr.score(X_train, Y_train))
    print('[6]XGBoost Regression Training Accuracy:', xgb_r.score(X_train, Y_train))
    print('[7]Decision Tree Regression Training Accuracy:', dtr.score(X_train, Y_train))
    print('[8]SVM Regression Training Accuracy:', svr.score(X_train, Y_train))



    return rfr, gbtr, lr,rdr, lsr, enr, xgb_r,dtr,svr

In [21]:
#Get and train all of the models
model = models(X_train,Y_train)

[0]Random Forest Regression Training Accuracy: 0.9557606373562959
[1]GBT Regressor Training Accuracy: 0.9999243417061058
[2]Linear Regression Training Accuracy: 0.9537415449043007
[3]Ridge Regression Training Accuracy: 0.9485347906986835
[4]Lasso Regression Training Accuracy: 0.33165888295129764
[5]Elastic Net Regression Training Accuracy: 0.7051834513099366
[6]XGBoost Regression Training Accuracy: 0.9999094687064979
[7]Decision Tree Regression Training Accuracy: 0.9489157167973766
[8]SVM Regression Training Accuracy: 0.8530640483214396




# Evaluating The Model 

In [22]:
model = models(X_train,Y_train)

[0]Random Forest Regression Training Accuracy: 0.95605040052256
[1]GBT Regressor Training Accuracy: 0.9999244317158402
[2]Linear Regression Training Accuracy: 0.9537415449043007
[3]Ridge Regression Training Accuracy: 0.9485347906986835
[4]Lasso Regression Training Accuracy: 0.33165888295129764
[5]Elastic Net Regression Training Accuracy: 0.7051834513099366
[6]XGBoost Regression Training Accuracy: 0.9999094687064979
[7]Decision Tree Regression Training Accuracy: 0.9489157167973766
[8]SVM Regression Training Accuracy: 0.8530640483214396




In [23]:
from sklearn.metrics import mean_squared_error,mean_absolute_error
for i in range(len(model)):
    y = list(Y_test)
    

    pred = model[i].predict(X_test)
    mse = mean_squared_error(y,pred)
    rmse = mean_squared_error(y,pred,squared=False)
    mae = mean_absolute_error(y,pred)
    print(f"Mean Squared Error for {model[i]}  is  {mse}")
    print(f"Root Mean Squared Error for {model[i]} is {rmse}")
    print(f"Mean Absolute Error for {model[i]} is {mae}")
    print()
    print()


Mean Squared Error for RandomForestRegressor(max_depth=20, min_samples_leaf=10, n_estimators=20)  is  0.028631223856810106
Root Mean Squared Error for RandomForestRegressor(max_depth=20, min_samples_leaf=10, n_estimators=20) is 0.1692076353383916
Mean Absolute Error for RandomForestRegressor(max_depth=20, min_samples_leaf=10, n_estimators=20) is 0.12464361275068919


Mean Squared Error for GradientBoostingRegressor(max_depth=7, n_estimators=89)  is  0.04879288750322151
Root Mean Squared Error for GradientBoostingRegressor(max_depth=7, n_estimators=89) is 0.22089112137707462
Mean Absolute Error for GradientBoostingRegressor(max_depth=7, n_estimators=89) is 0.14751458084899077


Mean Squared Error for LinearRegression()  is  0.02940100195363562
Root Mean Squared Error for LinearRegression() is 0.17146720372606425
Mean Absolute Error for LinearRegression() is 0.1248833346299645


Mean Squared Error for Ridge(max_iter=50)  is  0.029776941892131693
Root Mean Squared Error for Ridge(max_iter