In [5]:
### 모듈 및 데이터 로딩
import pandas as pd
import numpy as np
from sklearn.impute import KNNImputer
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
import joblib
file = '../0. Data/2. output/0. Test_data.csv'
data = pd.read_csv(file)

In [6]:
# 모델 및 스케일러 로드
lr_model = joblib.load('../2. Modeling/model/lr_model.pkl')
scaler_data = joblib.load('../2. Modeling/model/lr_scaler.pkl')
scaler = scaler_data['scaler']
feature_names = scaler_data['feature_names']
target_scaler = scaler_data['target_scaler']

In [7]:
# 첫 번째 행에 대한 예측
def predict_first_row(data, model, scaler, feature_names):
    first_row = data.iloc[0][feature_names]
    features_scaled = scaler.transform(first_row.values.reshape(1, -1))
    predicted_scale_pv_scaled = model.predict(features_scaled)[0]
    predicted_scale_pv = target_scaler.inverse_transform([[predicted_scale_pv_scaled]])[0][0]
    return round(predicted_scale_pv, 2)  # 소수점 아래 둘째 자리로 반올림


# 최적의 k_rpm_pv 찾기
def find_optimal_k_rpm_pv(n_temp, s_temp, c_temp, current_k_rpm_pv, model, scaler, feature_names, target_scaler):
    best_k_rpm_pv = current_k_rpm_pv
    best_scale_pv_diff = float('inf')
    for k_rpm_adjustment in [-1, 0, 1]:  # -1, 0, 1
        k_rpm_pv = current_k_rpm_pv + k_rpm_adjustment
        features = pd.DataFrame([[n_temp, s_temp, c_temp, k_rpm_pv]], columns=feature_names)
        features_scaled = scaler.transform(features)
        predicted_scale_pv_scaled = model.predict(features_scaled)[0]
        predicted_scale_pv = target_scaler.inverse_transform([[predicted_scale_pv_scaled]])[0][0]
        scale_pv_diff = abs(predicted_scale_pv - 3)
        if scale_pv_diff < best_scale_pv_diff:
            best_scale_pv_diff = scale_pv_diff
            best_k_rpm_pv = k_rpm_pv
    return best_k_rpm_pv


# 전체 데이터에 대해 예측 및 최적 k_rpm_pv 찾기
def predict_and_optimize(data, model, scaler, target_scaler, feature_names):
    results = []
    for i in range(len(data)):
        row = data.iloc[i]
        n_temp, s_temp, c_temp, k_rpm_pv = row['n_temp_pv'], row['s_temp_pv'], row['c_temp_pv'], row['k_rpm_pv']
        if i == 0:
            #k_rpm_pv = 187  # 첫 행의 k_rpm_pv 값을 187로 고정
            predicted_scale_pv = predict_first_row(pd.DataFrame([row]), model, scaler, feature_names)
        else:
            features = pd.DataFrame([[n_temp, s_temp, c_temp, k_rpm_pv]], columns=feature_names)
            features_scaled = scaler.transform(features)
            predicted_scale_pv_scaled = model.predict(features_scaled)[0]
            predicted_scale_pv = target_scaler.inverse_transform([[predicted_scale_pv_scaled]])[0][0]
            predicted_scale_pv = round(predicted_scale_pv, 2)

        results.append({
            'n_temp_pv': n_temp,
            's_temp_pv': s_temp,
            'c_temp_pv': c_temp,
            'predicted_scale_pv': predicted_scale_pv,
            'k_rpm_pv': k_rpm_pv
        })

        if i < len(data) - 1:
            current_k_rpm_pv = k_rpm_pv
            optimal_k_rpm_pv = find_optimal_k_rpm_pv(n_temp, s_temp, c_temp, current_k_rpm_pv, model, scaler, feature_names, target_scaler)
            data.at[i + 1, 'k_rpm_pv'] = optimal_k_rpm_pv
    
    return pd.DataFrame(results)


# 데이터 스케일링
X_test_final = data[feature_names]
X_test_final_scaled = scaler.transform(X_test_final)


# 예측 및 최적화 실행
results = predict_and_optimize(data, lr_model, scaler, target_scaler, feature_names)


# DF에 기존 sv값 컬럼 추가
results[['c_temp_sv', 's_temp_sv', 'n_temp_sv', 'k_rpm_sv', 'E_scr_pv', 'E_scr_sv']] = (70, 70, 70, 180, 8, 8)


# 새로운 컬럼 추가
results['c_temp_pv_dif'] = results['c_temp_pv'].diff().fillna(0)
results['s_temp_pv_dif'] = results['s_temp_pv'].diff().fillna(0)
results['n_temp_pv_dif'] = results['n_temp_pv'].diff().fillna(0)
results['scale_dif'] = results['predicted_scale_pv'].diff().fillna(0)
results['rpm_dif'] = results['k_rpm_pv'].diff().fillna(0)
results['loss'] = results['scale_dif']


# 결과를 CSV 파일로 저장
results.to_csv('../6. simulation_csv/simulation.csv', index=False)



In [8]:
# 실제 scale_pv와 예측된 predicted_scale_pv를 포함하는 새로운 데이터프레임 생성
final_results = pd.DataFrame({
    'actual_scale_pv': data['scale_pv'],
    'predicted_scale_pv': results['predicted_scale_pv']
})


# 결과를 CSV 파일로 저장
final_results.to_csv('../6. simulation_csv/real&pred_scale.csv', index=False)