In [1]:
import itertools
import os
from concurrent.futures import ThreadPoolExecutor

import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor as Regression
from tqdm import tqdm

In [2]:
df = pd.read_excel('database.xlsx')
df.columns = [column.split()[0] for column in df.columns]
df = df[df['X9'] < df['X9'].quantile(0.95)]

In [3]:
input_features = ['X1', 'X2', 'X3',
                  'X4', 'X5', 'X6']
healing_features = ['U1', 'U2']
features = input_features + healing_features

output_labels = ['X7', 'X8', 'X9']
optimization_label = 'X10'
labels = output_labels + [optimization_label]

In [4]:
models = dict()
for label in tqdm(labels):
    X, y = df[features], df[label].values
    model = Regression()
    model.fit(X, y)
    models[label] = model

100%|██████████| 4/4 [00:00<00:00,  8.70it/s]


In [5]:
constraints = {
    'U1': (0, 20),
    'U2': (14, 32),
    
    'X7': (42, 80),
    'X8': (3, 14),
    'X9': (0.4, 1.4),
    'X10': (56, 78)
}

In [44]:
def optimization_score(value):
    min_val, max_val = constraints[optimization_label]
    return np.abs(value - np.mean([min_val, max_val]))


def healing_field():
    prod = itertools.product(*[np.arange(*constraints[feature], 3)
                               for feature in healing_features])
    return list(map(tuple, prod))


def contraints_score(row_df):
    for label, (min_val, max_val) in constraints.items():
        if not (min_val <= row_df[label].values[0] <= max_val):
            return False
    return True


def optimize_row(row, k_best):
    print(row)
    results = dict()
    
    for field in tqdm(healing_field()):
        row_dict = dict(row)
        row_dict.update(dict(zip(healing_features, field)))
        row_df = pd.DataFrame(pd.Series(row_dict)).transpose()
        row_df = predict(row_df)
        if contraints_score(row_df):
            score = optimization_score(row_df[optimization_label].values[0])
            results[score] = row_df
    
    scores = sorted(results.keys())[:k_best]
    return [{'score': s, 'healing_plan': results[s].to_dict(orient='records')[0]}
            for s in scores]
    
    
def optimize(input_df, k_best=5):
    input_df = input_df.copy()
    return input_df.apply(lambda x: optimize_row(x, k_best), axis=1).values
    

def predict(input_df):
    input_df = input_df.copy()
    if missed_columns := set(features) - set(input_df.columns):
        raise ValueError(f'Missed input columns: {", ".join(missed_columns)}')

    for label in labels:
        model = models[label]
        X = input_df[features]
        input_df[label] = model.predict(X)
    
    return input_df

In [45]:
%%time
res = optimize(df.iloc[:3][['X1', 'X2', 'X3', 'X4', 'X5', 'X6']])

  7%|▋         | 3/42 [00:00<00:01, 22.35it/s]

X1    189.0
X2     84.0
X3     64.0
X4    100.0
X5      0.0
X6     72.0
Name: 0, dtype: float64


100%|██████████| 42/42 [00:01<00:00, 22.92it/s]
  7%|▋         | 3/42 [00:00<00:01, 24.50it/s]

X1     4.9
X2    10.0
X3    82.0
X4    73.0
X5     1.5
X6    80.0
Name: 1, dtype: float64


100%|██████████| 42/42 [00:01<00:00, 22.95it/s]
  7%|▋         | 3/42 [00:00<00:01, 23.17it/s]

X1    168.0
X2     43.0
X3     73.0
X4     60.0
X5      2.0
X6     74.0
Name: 2, dtype: float64


100%|██████████| 42/42 [00:01<00:00, 23.48it/s]

CPU times: user 5.36 s, sys: 142 ms, total: 5.5 s
Wall time: 5.47 s





In [46]:
pd.DataFrame(pd.Series(res[2][2]['healing_plan'])).transpose()

Unnamed: 0,X1,X2,X3,X4,X5,X6,U1,U2,X7,X8,X9,X10
0,168.0,43.0,73.0,60.0,2.0,74.0,18.0,29.0,59.5,7.81,0.505,64.0


In [50]:
res[0]

[{'score': 1.75,
  'healing_plan': {'X1': 189.0,
   'X2': 84.0,
   'X3': 64.0,
   'X4': 100.0,
   'X5': 0.0,
   'X6': 72.0,
   'U1': 18.0,
   'U2': 20.0,
   'X7': 56.48,
   'X8': 5.61,
   'X9': 0.475,
   'X10': 65.25}},
 {'score': 1.8499999999999943,
  'healing_plan': {'X1': 189.0,
   'X2': 84.0,
   'X3': 64.0,
   'X4': 100.0,
   'X5': 0.0,
   'X6': 72.0,
   'U1': 18.0,
   'U2': 23.0,
   'X7': 56.07,
   'X8': 5.61,
   'X9': 0.465,
   'X10': 65.15}},
 {'score': 2.319999999999993,
  'healing_plan': {'X1': 189.0,
   'X2': 84.0,
   'X3': 64.0,
   'X4': 100.0,
   'X5': 0.0,
   'X6': 72.0,
   'U1': 18.0,
   'U2': 29.0,
   'X7': 56.05,
   'X8': 7.7,
   'X9': 0.505,
   'X10': 64.68}},
 {'score': 2.8400000000000034,
  'healing_plan': {'X1': 189.0,
   'X2': 84.0,
   'X3': 64.0,
   'X4': 100.0,
   'X5': 0.0,
   'X6': 72.0,
   'U1': 18.0,
   'U2': 26.0,
   'X7': 56.06,
   'X8': 7.78,
   'X9': 0.475,
   'X10': 64.16}},
 {'score': 2.930000000000007,
  'healing_plan': {'X1': 189.0,
   'X2': 84.0,
   