<a href="https://colab.research.google.com/github/PARKHYUNSOO/UICustomizing/blob/master/manufacturing_practice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Importing

In [None]:
# Basic
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Sklearn Metrics
from sklearn import metrics
from sklearn.metrics import mean_squared_error as mse

import warnings
warnings.filterwarnings('ignore')

## Data Loading

In [None]:
train_df = pd.read_csv('train.csv', encoding = 'utf-8')
val_df = pd.read_csv('val.csv', encoding = 'utf-8')
test_df = pd.read_csv('test.csv', encoding = 'utf-8')

In [None]:
train_df.columns

In [None]:
train_df.head(5)

## Prediction Model

In [None]:
# 데이터 구분
train_x, train_y = train_df.drop(columns = ['y']), train_df[['y']]
val_x, val_y = val_df.drop(columns = ['y']), val_df[['y']]
test_x, test_y = test_df.drop(columns = ['y']), test_df[['y']]

### Linear Regression

In [None]:
# Linear Regression 학습
from sklearn.linear_model import LinearRegression as LR
"""
Linear regression 학습
lr = []
"""

In [None]:
# 결과
lr_predict = lr.predict(test_x)
lr_df = test_y.copy()
lr_df['predict'] = lr_predict
lr_df = lr_df.sort_values(['y']).reset_index()
#lr_df = lr_df.sort_values(['predict']).reset_index()
print('MSE : {}'.format(mse(test_y, lr_predict)))

plt.figure(figsize = (10, 5))
plt.plot(lr_df['y'], marker = 'o', ls = '', markersize = 3)
plt.plot(lr_df['predict'], marker = 'o', ls = '', markersize = 3)
plt.show()

### Random Forest Regressor

In [None]:
# Random Forest 학습
from sklearn.ensemble import RandomForestRegressor as RFR
"""
Random Forest 학습
rfr = []
"""

In [None]:
# 결과
rfr_predict = rfr.predict(test_x)
rfr_df = test_y.copy()
rfr_df['predict'] = rfr_predict
rfr_df = rfr_df.sort_values(['y']).reset_index()
print('MSE : {}'.format(mse(test_y, rfr_predict)))

plt.figure(figsize = (10, 5))
plt.plot(rfr_df['y'], marker = 'o', ls = '', markersize = 3)
plt.plot(rfr_df['predict'], marker = 'o', ls = '', markersize = 3)
plt.show()

### lightGBM

In [None]:
import lightgbm

# lightgbm 용 데이터 만들기
train_data = lightgbm.Dataset(train_x, label = train_y)
val_data = lightgbm.Dataset(val_x, label = val_y)
test_data = lightgbm.Dataset(test_x, label = test_y)

# lightgbm 학습
"""
lightgbm
params = {'learning_rate': [],
          'max_depth': [],
          'boosting': 'gbdt',
          'objective': 'regression',
          'metric': 'mse',
          'is_training_metric': True,
          'num_leaves': [],
          'feature_fraction': 0.9,
          'bagging_fraction': 0.7,
          'bagging_freq': 5,
          'seed':2018}
model = []
"""
pass

In [None]:
# 결과
lightgbm_predict = model.predict(test_x)
lightgbm_df = test_y.copy()
lightgbm_df['predict'] = lightgbm_predict
print('MSE : {}'.format(mse(test_y, lightgbm_predict)))
lightgbm_df = lightgbm_df.sort_values(['y']).reset_index()

plt.figure(figsize = (10, 5))
plt.plot(lightgbm_df['y'], marker = 'o', ls = '', markersize = 3)
plt.plot(lightgbm_df['predict'], marker = 'o', ls = '', markersize = 3)
plt.show()

## Optimization

### linear regression 을 활용한 프로세스 변수 최적화
- Stage4_2, Stage4_6 가 최적화 대상인 운전변수

In [None]:
from scipy.optimize import minimize

In [None]:
# 활용할 Linear regression 의 결과물
print('weight : ', lr.coef_[0])
print('bias : ', lr.intercept_)

coef_df = pd.DataFrame(lr.coef_, columns = list(train_x.columns))
control_weight, not_weight = np.array(coef_df[['x_1', 'x_2']]), np.array(coef_df.drop(columns = ['x_1', 'x_2']))

In [None]:
# 최적화를 위한 class
class optimize :
    def __init__(self, control_data, not_data, control_weight, not_weight, intercept, target) :
        self.control_data, self.not_data = control_data, not_data
        self.control_weight, self.not_weight = control_weight, not_weight
        self.intercept, self.target = intercept, target

    def objective(self, x) :
        """
        objective function 설계
        return []
        """
        pass

    def solve(self) :
        """
        solving 설계
        sol = []
        """
        return sol

In [None]:
# 최적화 대상 및 목표값
target_data = test_df.iloc[-100]
control_data, not_data = target_data[['x_1', 'x_2']], target_data.drop(['x_1', 'x_2', 'y'])
print('Target Value : ', train_df['y'].mean())

In [None]:
# 최적화 진행 및 결과
opt = optimize(control_data, not_data, control_weight, not_weight, lr.intercept_, train_df['y'].mean())
sol = opt.solve()
result_scipy = target_data.copy().drop(['y'])
result_scipy[['x_1', 'x_2']] = sol.x

In [None]:
# 결과 출력
for c in ['x_1', 'x_2'] :
    print('{} | original: {:.5f} | optimal: {:.5f}'.format(c, target_data[c].item(), result_scipy[c].item()))
print('\nOriginal Out Flow: ', lr.predict(np.array(target_data.drop(['y'])).reshape(1, -1)).item())
print('Target y: ', train_df['y'].mean())
print('Optimized Out Flow: {:.5f}'.format(lr.predict(np.array(result_scipy).reshape(1, -1)).item()))

### Genetic Algorithm

In [None]:
import GeneticAlgorithm  as GA

In [None]:
def custom_obj(features, additional, return_prediction=False) :
    inputs = additional['data'].copy()
    for k in features.keys() :
        inputs[k] = features[k]
    inputs = np.array(inputs).reshape(1, len(inputs))
    m = additional['model']
    t = additional['target']
    prediction = m.predict(inputs)
    v = (t - prediction)**2

    ##### Do not change below #####
    if not return_prediction :
        return v
    else :
        return v, prediction

In [None]:
additional_info = {
    'model' : model,
    'data' : target_data.drop('y'),
    'target' : train_df['y'].mean(),
}

"""
The possible range of the features
"""
features_range = {
    'x_1' : [0, 1],
    'x_2' : [0, 1]
}

In [None]:
"""
Genetic Algorithm

- Parameters
n_population : Int / the number of units
max_iteration : Int / the terminal point
n_elite : Int / the number of elites
mutation : Int / the number of mutation, there will be no mutation if set as 0
optimize_to : Str / 'minimize' or 'maximize'
early_stop : Int / Stop before max_iteration when the optimal value do not change, there will be no early stop if set as 0
printing : Int /  Print the value of the iteration during running, there will be no printing if set as 0

- Attributes
iteration : Class "iteration" / initial and final iteration of GA at before and after running respectively
best_unit : Class "unit" / The best unit after running
best_features : List / The features of solution after running
best_result : The optimal value of objective function after running
"""
n_population =  5
n_elite = 2
mutation = 1
ga = GA(
    n_population=n_population,
    n_elite=n_elite,
    mutation=mutation
)
ga.run(custom_obj, features_range, additional_info=additional_info)

In [None]:
"""
Printing Result
"""
target_data = additional_info['data']
_, ga_optimized = custom_obj(ga.best_features, additional_info, return_prediction=True)
for k in ga.best_features.keys():
    print('{} | original: {:.5f} | optimal: {:.5f}'.format(k, target_data[k].item(), ga.best_features[k]))
print('\nOriginal y: ', model.predict(np.array(target_data).reshape(1, -1)).item())
print('Target y: ', additional_info['target'])
print('Optimized y: {:.5f}'.format(ga_optimized[0]))