In [0]:
import pandas as pd
import numpy as np
import pickle

import warnings
warnings.filterwarnings("ignore")

In [0]:
def load_dcs(columns_of_interest = ['AirFlow','Fuel flow','MelterCrownTemp'], rename_columns = {'TE204TC22':'MelterCrownTemp',
                        'MGFT221AI18':'Fuel flow'}):
    """
    This function loads the dcs data
    
    Parameters
    --------------------------------------------------
    columns_of_interest: list of required columns
    rename_columns: dict for renaming tags
    
    Returns
    --------------------------------------------------
    df: pd.DataFrame
    """
    dcs_df = pd.read_csv('/dbfs/mnt/datalake/master/dcs/master_dcs_255_tpd_live.csv')
    dcs_df["DateTime"] = pd.to_datetime(dcs_df["DateTime"])
    dcs_df["AirFlow"] = dcs_df[["CABFT211AI12", "CABFT212AI13"]].apply(max, axis=1)
    dcs_df.rename(columns = rename_columns, inplace=True)
    dcs_df.set_index(['DateTime'], inplace=True)
    dcs_df_v2 = dcs_df[columns_of_interest]
    dcs_df_v2['Date'] = dcs_df_v2.index.date
    dcs_df_v2.sort_values(by = ['DateTime'], inplace = True)
    return dcs_df_v2

In [0]:
def reversal_outlier_treatment(df):
    """
    This function cleans the DCS data based on reversal cycle
    
    Parameters
    ----------------------
    df: pd.DataFrame
    
    Return
    ---------------------
    input_df - pd.DataFrame
    """
    df.dropna(subset = ['Fuel flow'], inplace = True)
    for date in df['Date'].unique():

        df.loc[df['Date'] == date,"min"] = pd.cut(df.loc[df['Date'] == date,"Fuel flow"],10).value_counts().index[0].left
        df.loc[df['Date'] == date,"max"] = pd.cut(df.loc[df['Date'] == date,"Fuel flow"],10).value_counts().index[0].right
    df_v2 = df.loc[((df['Fuel flow'] >= df['min']) & (df['Fuel flow'] <= df['max']))]
    print ("\nOutliers (%age data points) removed from reversal cycle is ", np.round(1 - df_v2.shape[0]/df.shape[0] , 2))
    
    hourly_data = df_v2.resample('10T').mean()
    input_df = pd.DataFrame()
    input_df['timestamp'] = [hourly_data.index.values[-1]]
    input_df['fuel_flow'] = [hourly_data['Fuel flow'].values[-1]]
    input_df['AirFlow'] = [hourly_data['AirFlow'].values[-1]]
    input_df['MelterCrownTemp'] = [hourly_data['MelterCrownTemp'].values[-1]]
    input_df['target - MCT'] = [1572.5 - hourly_data['MelterCrownTemp'].values[-1]]
    return input_df

In [0]:
def load_uems_data(input_df):
    """
    This function loads the UEMS data having ncv values.
    
    Parameters
    --------------------------------------------------
    input_df: pd.DataFrame
    
    Returns
    --------------------------------------------------
    input_df: pd.DataFrame
    """
    ncv = pd.read_csv("/dbfs/mnt/datalake/master/uem/jbr/15_min_tag_live.csv")
    ncv.sort_values(by = "From", inplace = True)
    ncv.rename(columns = {'STREAM2_INFERIOR_CV_NCV_METER_JBR':'GAIL cv',
                          'STREAM1_INFERIOR_CV_NCV_METER_JBR':'GSP cv',
                         }, inplace = True)

    input_df['NCV'] = ncv['GAIL cv'].values[-1]
    return input_df

In [0]:
def dtree_prediction(input_df):
    """
    This function predicts fuel flow using decision tree
    
    Parameters
    --------------------------------------------------
    input_df: pd.DataFrame
    
    Returns
    --------------------------------------------------
    input_df: pd.DataFrame
    """
    filename = '/dbfs/mnt/datalake/exploratory/Furnace_Analytics/265TPD/dtree_regime_model.sav'
    loaded_model = pickle.load(open(filename, 'rb'))
    input_df['dtree_prediction'] = loaded_model.predict(input_df[["NCV","AirFlow","MelterCrownTemp"]])
    return input_df

In [0]:
def checks(input_df):
    """
    This function performs the final 

    Parameters
    --------------------------------------------------
    input_df: pd.DataFrame

    Returns
    --------------------------------------------------
    input_df: pd.DataFrame
    """
    x1 = 2
    x2 = 1.5
    x3 = 2
    x4 = 2.2
    x5 = 1
    max_number = 8.842131
    gap = input_df.loc[0,'target - MCT']
    if gap == 0:
        if ((input_df.loc[0, 'model_prediction'] >= input_df.loc[0, 'fuel_flow'] - x1) & (input_df.loc[0, 'model_prediction'] <= input_df.loc[0, 'fuel_flow'] + x1)):
            input_df['final prediction'] = input_df['model_prediction']
        else:
            input_df['final prediction'] = input_df['fuel_flow']

    elif gap > 0:

        if ((input_df.loc[0, 'model_prediction'] >= input_df.loc[0, 'fuel_flow'] -x2) & (input_df.loc[0, 'model_prediction'] <= input_df.loc[0, 'fuel_flow'] + max_number)):
            input_df['final prediction'] = input_df['model_prediction']
        else:
            input_df['final prediction'] = input_df['fuel_flow'] + min(gap*x3, max_number)
    else:
        if ((input_df.loc[0, 'model_prediction'] >= input_df.loc[0, 'fuel_flow'] - max_number) & (input_df.loc[0, 'model_prediction'] <= input_df.loc[0, 'fuel_flow'] + x5)):
            input_df['final prediction'] = input_df['model_prediction']
        else:
            input_df['final prediction'] = input_df['fuel_flow'] + max(x4*gap,-1*max_number)

    return input_df

In [0]:
def prediction(input_df):
    """
    This function predicts the final fuel flow based on regime based model
    
    Parameters
    --------------------------------------------------
    input_df: pd.DataFrame
    
    Returns
    --------------------------------------------------
    input_df: pd.DataFrame
    """
    coeff = pd.read_excel('/dbfs/mnt/datalake/exploratory/Furnace_Analytics/Model_coefficients.xlsx', sheet_name = '265TPD')
    input_df['model_prediction'] = np.dot(coeff['Coefficient'].T[1:],input_df.iloc[0,[5,4,6,1]]) + coeff['Coefficient'].T[0]
    input_df = checks(input_df)
    return input_df

In [0]:
dcs_df = load_dcs()
input_df = reversal_outlier_treatment(dcs_df)
input_df = load_uems_data(input_df)
input_df = dtree_prediction(input_df)
input_df = prediction(input_df)
input_df

Unnamed: 0,timestamp,fuel_flow,AirFlow,MelterCrownTemp,target - MCT,NCV,dtree_prediction,model_prediction,final prediction
0,2020-10-01 14:00:00,1219.089185,15731.896875,1572.462158,0.037842,8976.25121,1214.258422,1216.444946,1219.164868
