In [None]:
import numpy as np
import pandas as pd
import seaborn as sn

from sklearn.ensemble import RandomForestRegressor as rfr

from sklearn.model_selection import train_test_split, KFold, cross_validate
from sklearn.model_selection import RandomizedSearchCV as rscv
from sklearn.model_selection import GridSearchCV as gscv
from sklearn.model_selection import cross_val_score as cvs

from sklearn.feature_selection import SelectFromModel
from sklearn.feature_selection import RFE

from scipy.stats import randint

from sklearn.metrics import mean_absolute_percentage_error as mape
from sklearn.metrics import max_error, r2_score

In [None]:
pip install -U pymoo;

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pymoo
  Downloading pymoo-0.6.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (2.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m61.3 MB/s[0m eta [36m0:00:00[0m
Collecting cma==3.2.2 (from pymoo)
  Downloading cma-3.2.2-py2.py3-none-any.whl (249 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m249.1/249.1 kB[0m [31m25.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alive-progress (from pymoo)
  Downloading alive_progress-3.1.2-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.7/75.7 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting dill (from pymoo)
  Downloading dill-0.3.6-py3-none-any.whl (110 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.5/110.5 kB[0m [31m12.8 MB/s[0m eta [36m

In [None]:
rs = 3851

In [None]:
def score(model, X:list, y:list):
  print(model,'\n')
  res = model.predict(X[1])
  #Scores
  print('Score on test Values:')
  print('R Squared Value:', r2_score(res, y[1]))
  print('Mean absolute percentage error:', mape(res, y[1]))
  print('Maximum error:', max_error(res, y[1]))
  print('')
  kf = KFold(n_splits = 5, shuffle=False)
  cvr = cross_validate(model, X[0], y[0], cv=kf, scoring = ['r2', 'neg_mean_absolute_percentage_error', 'max_error'], n_jobs=-1)
  print('Cross Validate:')
  print('R Squared Mean and Standard deviation:',cvr['test_r2'], cvr['test_r2'].mean(), cvr['test_r2'].std())
  print('Mean absolute percentage error Mean and Standard deviation:',cvr['test_neg_mean_absolute_percentage_error'], cvr['test_neg_mean_absolute_percentage_error'].mean(), cvr['test_neg_mean_absolute_percentage_error'].std())
  print('Maximum error Mean and Standard deviation:',cvr['test_max_error'], cvr['test_max_error'].mean(), cvr['test_max_error'].std())

In [None]:
def tune_rf_model(model, X_train, y_train):

  kf = KFold(n_splits = 5, shuffle=False)

  max_depth = [i for i in range(4,9)]
  n_estimators = [i for i in range(7,15)]
  min_samples_split = [i for i in range(4, 9)]
  min_samples_leaf = [i for i in range(3, 7)]

  rf_params = {
      'max_depth':max_depth,
      'n_estimators': n_estimators,
      'min_samples_split':min_samples_split,
      'min_samples_leaf':min_samples_leaf,
  }

  rscvr = rscv(model, rf_params, cv = kf,n_iter = 200, scoring='max_error', n_jobs = -1)
  search = rscvr.fit(X_train, y_train)
  searchd = search.best_params_
  print(searchd)

  rfn = rfr(
    max_depth=searchd['max_depth'], 
    n_estimators=searchd['n_estimators'], 
    min_samples_split=searchd['min_samples_split'],
    min_samples_leaf=searchd['min_samples_leaf'],
    n_jobs=-1, random_state = rs)
  
  return rfn

In [None]:
def pred_val(model1, model2):
  val1 = model1.predict(X)
  print("val1", val1)
  min_dev_1 = min(abs(val1-target_value_1))
  idx = list(abs(val1-target_value_1)).index(min_dev_1)
  print(idx)
  print(val1[idx])
  print("")

  val2 = model2.predict(X)
  print("val2", val2)

  min_dev_2 = min(abs(val2-target_value_2))
  idx_2 = list(abs(val2-target_value_2)).index(min_dev_2)
  print(idx_2)
  print(val2[idx_2])
  print("")

  r = np.sqrt((1 - val1/target_value_1)**2 + (1 - val2/target_value_2)**2)

  idx_min = list(r).index(min(r))
  print(idx_min)
  print(min(r))
  print(X[idx_min])

  return X[idx_min]

In [None]:
dat_m = pd.read_csv('/content/Test data 2.csv')
dat_m.tail(5)

Unnamed: 0,CaO,SiO,Na2O,K2O,MgO,Al2O3,Total Binder,H20,FA,CA,Alk,DRCM,Strength
75,169.070323,156.04807,0.584315,5.93639,7.045674,43.900539,423.42,211.71,702.19,1110.16,27.440827,1.61,30.5
76,195.796755,180.713589,0.676683,6.874707,8.159424,50.839488,490.35,196.14,652.4,1110.16,36.192852,1.51,44.9
77,174.041608,160.636393,0.601496,6.110939,7.252842,45.191355,435.87,196.1415,692.93,1110.16,47.602404,1.42,40.9
78,241.529867,222.921612,0.834739,8.480376,10.065238,62.713605,604.88,211.708,702.19,1110.16,22.961619,1.24,44.56
79,223.76772,206.529816,0.773352,7.856808,9.325056,58.102272,560.4,196.14,725.35,1110.16,24.783996,1.1,45.7


In [None]:
out = ['DRCM']
inp = ['CaO',	'SiO',	'H20', 'FA', 'CA', 'Alk', 'Total Binder']

n_e = len(inp)

Xm = dat_m[inp]
ym = dat_m[out]

X_trn_m, X_tst_m, y_trn_m, y_tst_m = train_test_split(Xm, ym, random_state = rs)

Xml = [X_trn_m, X_tst_m]
yml = [y_trn_m, y_tst_m]

rfm = rfr(random_state=rs)
rfm.fit(Xml[0], yml[0])
score(rfm, Xml, yml)

RandomForestRegressor(random_state=3851) 

Score on test Values:
R Squared Value: 0.528179916700878
Mean absolute percentage error: 0.23164147817783434
Maximum error: 8.629086926799994



  rfm.fit(Xml[0], yml[0])


Cross Validate:
R Squared Mean and Standard deviation: [0.32521843 0.70073714 0.82649466 0.73360944 0.73617419] 0.6644467724198924 0.17468868138858326
Mean absolute percentage error Mean and Standard deviation: [-0.21940846 -0.17707209 -0.38871725 -0.20154181 -0.25402961] -0.24815384498992304 0.07462464651532345
Maximum error Mean and Standard deviation: [-7.33712289 -9.26909905 -4.25724623 -5.34591817 -6.66303488] -6.5744842442140055 1.71635511878841


In [None]:
tune_rf_model(rfm, Xml[0], yml[0])

{'n_estimators': 11, 'min_samples_split': 4, 'min_samples_leaf': 3, 'max_depth': 6}


  self.best_estimator_.fit(X, y, **fit_params)


In [None]:
rfmn = rfr(
    max_depth=9,#6,
    n_estimators=8,#8,
    min_samples_split=7,#6,
    min_samples_leaf=2,#4,
    n_jobs=-1, random_state = rs)
rfmn.fit(Xml[0], yml[0])
score(rfmn, Xml, yml)

RandomForestRegressor(max_depth=9, min_samples_leaf=2, min_samples_split=7,
                      n_estimators=8, n_jobs=-1, random_state=3851) 

Score on test Values:
R Squared Value: 0.5991299074862042
Mean absolute percentage error: 0.24591716484710485
Maximum error: 8.168741846733278

Cross Validate:
R Squared Mean and Standard deviation: [0.57023594 0.79504854 0.80277154 0.67534583 0.73362321] 0.7154050106465938 0.0860077926318096
Mean absolute percentage error Mean and Standard deviation: [-0.28271956 -0.17351994 -0.38929736 -0.40583452 -0.23596761] -0.29746780055591776 0.08892566603332556
Maximum error Mean and Standard deviation: [-3.32641865 -7.46902503 -4.13542815 -5.21392729 -6.93668769] -5.41629736449436 1.5858404867576987


  rfmn.fit(Xml[0], yml[0])


In [None]:
x = [149.124553,128.238074,	205.0,	743.146756,	975.0,	359.649123]
rfmn.predict([x])



array([19.66548329])

In [None]:
dat_s = pd.read_csv('/content/Data strnth 28 day.csv')
dat_s.tail(1)

Unnamed: 0,Cement Type,Cao,SiO2,H20,H20/B,FA,CA,Total Binder,Time/Strength,Strength
70,OPC(60)+Fly-Ash(40),149.124553,128.238074,205.0,0.57,743.146756,975.0,359.649123,28,20.738482


In [None]:
out_s = ['Strength']
#inp_s = ['Cao',	'SiO2',	'H20', 'FA', 'CA', 'Total Binder','Time/Strength']
inp_s = ['Cao',	'SiO2',	'H20/B', 'FA', 'CA', 'Total Binder']

Xs = dat_s[inp_s]
ys = dat_s[out_s]

X_trn_s, X_tst_s, y_trn_s, y_tst_s = train_test_split(Xs, ys, random_state = rs)

Xsl = [X_trn_s, X_tst_s]
ysl = [y_trn_s, y_tst_s]

rfs = rfr(random_state=rs)
rfs.fit(Xsl[0], ysl[0])
score(rfs, Xsl, ysl)

RandomForestRegressor(random_state=3851) 

Score on test Values:
R Squared Value: 0.49933150217083133
Mean absolute percentage error: 0.08897459814949926
Maximum error: 14.227556522099988



  rfs.fit(Xsl[0], ysl[0])


Cross Validate:
R Squared Mean and Standard deviation: [0.64199509 0.61890949 0.45614768 0.73715582 0.5439721 ] 0.5996360363432508 0.09462011420685776
Mean absolute percentage error Mean and Standard deviation: [-0.07877334 -0.19934252 -0.14304954 -0.18484661 -0.097568  ] -0.1407160000641076 0.047092820318646804
Maximum error Mean and Standard deviation: [-10.358359   -10.62243956  -9.3264083  -11.51409437 -10.2575791 ] -10.41577606426001 0.7020054168214994


In [None]:
tune_rf_model(rfs, Xsl[0], ysl[0])

{'n_estimators': 9, 'min_samples_split': 7, 'min_samples_leaf': 3, 'max_depth': 6}


  self.best_estimator_.fit(X, y, **fit_params)


In [None]:
rfsn = rfr(
    max_depth=6,#6,
    n_estimators=28,#8,
    min_samples_split=2,#4,
    min_samples_leaf=1,#5,
    n_jobs=-1, random_state = rs)
rfsn.fit(Xsl[0], ysl[0])
score(rfsn, Xsl, ysl)

  rfsn.fit(Xsl[0], ysl[0])


RandomForestRegressor(max_depth=6, n_estimators=28, n_jobs=-1,
                      random_state=3851) 

Score on test Values:
R Squared Value: 0.5245189750244661
Mean absolute percentage error: 0.09181664906185591
Maximum error: 13.839591907211314

Cross Validate:
R Squared Mean and Standard deviation: [0.62842148 0.61305053 0.47600251 0.70984173 0.58425521] 0.6023142921006921 0.07568328828443405
Mean absolute percentage error Mean and Standard deviation: [-0.07824695 -0.20208306 -0.13614384 -0.195691   -0.09545215] -0.1415233996163527 0.05051140344491442
Maximum error Mean and Standard deviation: [-11.01318742 -10.88551379  -8.3204216  -11.70964968  -9.02568524] -10.19089154544762 1.2900899486241681


In [None]:
x = [149.124553,128.238074,	205.0,	743.146756,	975.0,	359.649123]
rfsn.predict([x])



array([21.54946062])

In [None]:
x =[229.967578,	123.0095,	196.1415,	728.96,	1110.16,	435.87]
rfsn.predict([x])



array([26.87239277])

In [None]:
228.0119	127.8334	205	0.46	672.1601	975	445.6521739

In [None]:
def function_1(model1, target_value_1, X):
  pred1 = abs(model1.predict([X]) - (1-0.02)*target_value_1)
  return pred1

def function_2(model2, target_value_2, X):
  pred2 = abs(model2.predict([X]) - (1+0.02)*target_value_2)
  return pred2

In [None]:
target_value_1 = 2.95

target_value_2 = 42.9

In [None]:
dat_s = pd.read_csv('/content/Data strnth 28 day.csv')
dat_s.head(1)

Unnamed: 0,Cement Type,Cao,SiO2,H20,H20/B,FA,CA,Total Binder,Time/Strength,Strength
0,Cem,367.592596,109.790196,196.154,0.35,692.98,1110.23,560.44,28,54.4


In [None]:
from pymoo.core.problem import ElementwiseProblem

class functions(ElementwiseProblem):

    def __init__(self):
        super().__init__(n_var=n_e,
                         n_obj=2,
                         n_ieq_constr=0,
                         xl=np.array([100,0,0.35,400, 900, 400]),
                         xu=np.array([400, 200, 0.57, 800, 1200, 600]))

    def _evaluate(self, x, out, *args, **kwargs):

        Xt = []

        for i in range(n_e):
          Xt.append(x[i])
     
        f1 = function_1(rfmn, target_value_1, Xt)
        f2 = function_2(rfsn, target_value_1, Xt)

        out["F"] = [f1, f2]

problem = functions()

from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.operators.crossover.sbx import SBX
from pymoo.operators.mutation.pm import PM
from pymoo.operators.sampling.rnd import FloatRandomSampling

algorithm = NSGA2(
    pop_size=100,
    n_offsprings=40,
    sampling=FloatRandomSampling(),
    crossover=SBX(prob=0.7, eta=10),
    mutation=PM(eta=10),
    eliminate_duplicates=True
)

from pymoo.termination import get_termination

termination = get_termination("n_gen", 100)

In [None]:
from pymoo.optimize import minimize

res = minimize(problem,
               algorithm,
               termination,
               seed=None,
               save_history=True,
               verbose=True)

X = res.X
F = res.F



n_gen  |  n_eval  | n_nds  |      eps      |   indicator  
     1 |      100 |      6 |             - |             -




     2 |      140 |      8 |  0.000000E+00 |             f




     3 |      180 |      9 |  0.000000E+00 |             f




     4 |      220 |      9 |  0.0287941352 |             f




     5 |      260 |      7 |  0.0897058566 |         ideal




     6 |      300 |      6 |  0.0278691177 |             f




     7 |      340 |      8 |  0.6578705136 |         nadir




     8 |      380 |     10 |  0.0062214671 |             f




     9 |      420 |     12 |  0.0028925925 |             f




    10 |      460 |     17 |  0.000000E+00 |             f




    11 |      500 |     21 |  0.0018320200 |             f




    12 |      540 |     14 |  0.0158896237 |         ideal




    13 |      580 |     12 |  0.0236952732 |         ideal




    14 |      620 |     11 |  0.0081004634 |             f




    15 |      660 |     13 |  0.0276916019 |         ideal




    16 |      700 |     12 |  0.0052484719 |             f




    17 |      740 |     13 |  0.0214101122 |             f




    18 |      780 |     14 |  0.0011825654 |             f




    19 |      820 |     16 |  0.0025679827 |             f




    20 |      860 |     19 |  0.0008337278 |             f




    21 |      900 |     14 |  0.0037828284 |             f




    22 |      940 |     14 |  0.0048273753 |             f




    23 |      980 |     12 |  0.1400102964 |         nadir




    24 |     1020 |     16 |  0.0096311697 |             f




    25 |     1060 |     16 |  0.0019060068 |             f




    26 |     1100 |     17 |  0.0025511291 |             f




    27 |     1140 |     18 |  0.0000905780 |             f




    28 |     1180 |     20 |  0.0001865155 |             f




    29 |     1220 |     25 |  0.0002923669 |             f




    30 |     1260 |     24 |  0.0008494069 |             f




    31 |     1300 |     22 |  0.0015415833 |             f




    32 |     1340 |     27 |  0.0017397356 |             f




    33 |     1380 |     23 |  0.0031707748 |             f




    34 |     1420 |     28 |  0.0081843587 |         ideal




    35 |     1460 |     36 |  0.0007882617 |             f




    36 |     1500 |     42 |  0.0006756529 |             f




    37 |     1540 |     42 |  0.0018457184 |             f




    38 |     1580 |     45 |  2.0551717083 |         nadir




    39 |     1620 |     48 |  7.950816E-19 |             f




    40 |     1660 |     52 |  7.339215E-19 |             f




    41 |     1700 |     57 |  1.278217E-18 |             f




    42 |     1740 |     67 |  1.087439E-18 |             f




    43 |     1780 |     79 |  7.268234E-18 |             f




    44 |     1820 |     93 |  6.174091E-18 |             f




    45 |     1860 |    100 |  5.741905E-18 |             f




    46 |     1900 |     52 |  3.719647E-18 |             f




    47 |     1940 |     61 |  3.170847E-18 |             f




    48 |     1980 |     68 |  2.844436E-18 |             f




    49 |     2020 |     77 |  2.511970E-18 |             f




    50 |     2060 |     91 |  2.583022E-18 |             f




    51 |     2100 |    100 |  0.0001272566 |             f




    52 |     2140 |    100 |  0.0001272566 |             f




    53 |     2180 |    100 |  0.0001272566 |             f




    54 |     2220 |    100 |  0.0002545131 |             f




    55 |     2260 |    100 |  0.0002545131 |             f




    56 |     2300 |    100 |  0.0028889814 |         ideal




    57 |     2340 |    100 |  0.000000E+00 |             f




    58 |     2380 |    100 |  0.0006553214 |             f




    59 |     2420 |    100 |  0.0006553214 |             f




    60 |     2460 |    100 |  0.0006553214 |             f




    61 |     2500 |    100 |  0.0006553214 |             f




    62 |     2540 |    100 |  0.0006553214 |             f




    63 |     2580 |    100 |  0.0013106429 |             f




    64 |     2620 |     98 |  0.0013373907 |             f




    65 |     2660 |    100 |  0.0015976120 |             f




    66 |     2700 |     80 |  0.0019970151 |             f




    67 |     2740 |     87 |  0.0018363357 |             f




    68 |     2780 |    100 |  0.0015976120 |             f




    69 |     2820 |    100 |  0.0015976120 |             f




    70 |     2860 |    100 |  0.0015976120 |             f




    71 |     2900 |    100 |  0.0022529335 |             f




    72 |     2940 |    100 |  0.0029082549 |             f




    73 |     2980 |    100 |  0.000000E+00 |             f




    74 |     3020 |    100 |  0.000000E+00 |             f




    75 |     3060 |    100 |  0.000000E+00 |             f




    76 |     3100 |     32 |  1.387194E-17 |             f




    77 |     3140 |     35 |  1.268292E-17 |             f




    78 |     3180 |     39 |  1.138211E-17 |             f




    79 |     3220 |     41 |  1.082688E-17 |             f




    80 |     3260 |     43 |  1.032331E-17 |             f




    81 |     3300 |     46 |  9.650048E-18 |             f




    82 |     3340 |     53 |  8.375513E-18 |             f




    83 |     3380 |     57 |  7.787758E-18 |             f




    84 |     3420 |     66 |  6.725791E-18 |             f




    85 |     3460 |     81 |  5.480274E-18 |             f




    86 |     3500 |     89 |  0.0002828229 |             f




    87 |     3540 |    100 |  0.0145810569 |         nadir




    88 |     3580 |    100 |  0.000000E+00 |             f




    89 |     3620 |     90 |  4.047688E-19 |             f




    90 |     3660 |    100 |  3.642919E-19 |             f




    91 |     3700 |    100 |  3.642919E-19 |             f




    92 |     3740 |    100 |  3.642919E-19 |             f




    93 |     3780 |    100 |  3.642919E-19 |             f




    94 |     3820 |    100 |  3.642919E-19 |             f




    95 |     3860 |    100 |  3.642919E-19 |             f




    96 |     3900 |    100 |  3.642919E-19 |             f




    97 |     3940 |    100 |  3.642919E-19 |             f




    98 |     3980 |    100 |  3.642919E-19 |             f




    99 |     4020 |    100 |  3.642919E-19 |             f




   100 |     4060 |    100 |  3.642919E-19 |             f




In [None]:
X

array([[2.61338742e+02, 1.45554611e+02, 5.55740448e-01, 7.45685785e+02,
        1.05129095e+03, 5.38877197e+02],
       [1.63108523e+02, 1.64688271e+02, 5.60231775e-01, 7.93990240e+02,
        1.03969122e+03, 5.19420449e+02],
       [1.60670056e+02, 1.64586053e+02, 5.60154302e-01, 6.84744163e+02,
        1.08668936e+03, 5.38248716e+02],
       [1.64556012e+02, 1.44902106e+02, 5.69848156e-01, 7.65534775e+02,
        1.09863470e+03, 5.95137699e+02],
       [2.45069313e+02, 1.60582470e+02, 5.52431132e-01, 7.73217400e+02,
        1.10667641e+03, 5.54028545e+02],
       [2.45635273e+02, 1.44636720e+02, 5.49524667e-01, 7.69854311e+02,
        1.04444921e+03, 5.59750152e+02],
       [1.61641916e+02, 1.68954577e+02, 5.65526271e-01, 7.49648302e+02,
        1.07485442e+03, 5.36461254e+02],
       [1.64557913e+02, 1.60574511e+02, 5.52399363e-01, 6.86754558e+02,
        1.10341581e+03, 5.53201145e+02],
       [1.61581894e+02, 1.24609281e+02, 5.55553643e-01, 6.79582712e+02,
        1.08657366e+03, 

In [None]:
pred_val(rfmn, rfsn)

val1 [ 2.89744338 14.76745474  1.75641358  2.74101746  2.76633227  2.91694956
  2.47160388  1.87520988  3.6645071   3.32387438  1.75641358  1.75641358
  1.75641358  1.75641358  1.75641358  1.75641358  1.75641358  1.75641358
  1.75641358  1.75641358  1.75641358  1.75641358  1.75641358  1.75641358
  1.75641358  1.75641358  1.75641358  1.75641358  1.75641358  1.75641358
  1.75641358  1.75641358  1.75641358  1.75641358  1.75641358  1.75641358
  1.75641358  1.75641358  1.75641358  1.75641358  1.75641358  1.75641358
  1.75641358  1.75641358  1.75641358  1.75641358  1.75641358  1.75641358
  1.75641358  1.75641358  1.75641358  1.75641358  1.75641358  1.75641358
  1.75641358  1.75641358  1.75641358  1.75641358  1.75641358  1.75641358
  2.89744338  1.75641358  1.75641358  1.75641358  1.75641358  1.75641358
  1.75641358  1.75641358  1.75641358  1.75641358  1.75641358  1.75641358
  1.75641358  1.75641358  1.75641358  1.75641358  1.75641358  1.75641358
  1.75641358  1.75641358  1.75641358  1.756413



array([2.61338742e+02, 1.45554611e+02, 5.55740448e-01, 7.45685785e+02,
       1.05129095e+03, 5.38877197e+02])