In [2]:
#import modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import statsmodels.api as sm
import math
import networkx
import PIconnect as PI
from datetime import datetime

In [3]:
import PIconnect as PI
def pi_connect_and_pulldata(df_PITag, startDate, endDate, interval):
    
    """
    example       : pi_connect_and_pulldata(df,'7-Jun-16 06:45:00','7-Jun-16 07:00:00','15s')
    df_PITag      : pandas dataframe containing PI Tag need to pull
    startDate     : start date
    endDate       : end date
    interval      : how frequency
    """
    
    df_return = pd.DataFrame()
    
   
    for index, row in df_PITag.iterrows():
        print(row['Name'])
        #with PI.PIServer(server='pigilcollective') as server:
        with PI.PIServer() as server:
            try:
                points = server.search(row['Name'])[0]
                data = points.interpolated_values(startDate,endDate,interval) 
                #data = points.interpolated_values('7-Jun-16 06:45:00','7-Jun-16 07:00:00','15s') 
                if(isinstance(data.values[0], np.floating) & isinstance(data.values[0], float)):
                    data.name = row['Combined_tags']
                    df_return = df_return.append(data)
            except Exception:
                print('Error somewhere in pi_connect_and_pulldata')
                pass
    
    return df_return

In [4]:
from functools import reduce
import json

def export_tree_json(array_linkage, labels, current_time):
    
    id2name = dict(enumerate(labels))
    T = shc.to_tree(array_linkage , rd=False )

    # Create a nested dictionary from the ClusterNode's returned by SciPy
    def add_node(node, parent ):
        # First create the new node and append it to its parent's children
        newNode = dict( node_id=node.id, children=[] )
        parent["children"].append( newNode )

        # Recursively add the current node's children
        if node.left: add_node( node.left, newNode )
        if node.right: add_node( node.right, newNode )

    # Initialize nested dictionary for d3, then recursively iterate through tree
    #d3Dendro = dict(children=[], name="Root1")
    #add_node( T, d3Dendro )

    # Label each node with the names of each leaf in its subtree
    def label_tree( n ):
        # If the node is a leaf, then we have its name
        if len(n["children"]) == 0:
            leafNames = [ id2name[n["node_id"]] ]

        # If not, flatten all the leaves in the node's subtree
        else:
            leafNames = reduce(lambda ls, c: ls + label_tree(c), n["children"], [])

        # Delete the node id since we don't need it anymore and
        # it makes for cleaner JSON
        del n["node_id"]

        # Labeling convention: "-"-separated leaf names
        #n["name"] = name = "-".join(sorted(map(str, leafNames)))
        n["name"] = name = sorted(leafNames)[-1] 
        #n["name"] = name = sorted(leafNames)[0] 
        return leafNames

    
    # Initialize nested dictionary for d3, then recursively iterate through tree
    d3Dendro = dict(children=[], name="Root1")
    add_node( T, d3Dendro )

    # Create Tree Dendogram
    label_tree( d3Dendro["children"][0] )

    # Output to JSON
    json.dump(d3Dendro, open(current_time+".json", "w"), sort_keys=True, indent=4)

In [5]:
from statsmodels.tsa.stattools import grangercausalitytests

def grangers_causation_topN_list(first_out_tag, data
                                  , test_score_method ='ssr_chi2test'
                                  , decimal_round_digit=9
                                  , maxlag=9
                                  , topN=20): 
    
    #first_out_tag = 'LIT2060_PV'
    #overallTags = ['LIT2060_PV','LIT2061_PV','LIT2062_PV']
    input_df = data
    overall_tags = input_df.columns

    temp_df = pd.DataFrame(np.zeros((1, len(overall_tags))), columns=overall_tags, index=[first_out_tag])
    for y in temp_df.columns:
        #print(y)
        try:
            granger_result = grangercausalitytests(input_df[[first_out_tag, y]], maxlag=maxlag, verbose=False)
            p_values = [round(granger_result[i+1][0][test_score_method][1],decimal_round_digit) for i in range(maxlag)]
            min_p_value = np.min(p_values)
            temp_df.loc[first_out_tag, y] = min_p_value
        except Exception:
            pass
        
    return temp_df.transpose()[first_out_tag].sort_values().head(topN)

In [6]:
#df_PITag=pd.read_csv(r'C:\Users\szjt\Auto_AD\PI_Tag_Reduced.csv', encoding= 'unicode_escape')
def get_tag_data(area, level_YES, main_category, sub_category):
    df_PITag = pd.read_csv(r'C:\Users\szjt\Auto_AD\Final_'+area+'_Tags.csv')
    df_PITag["Name"] = df_PITag["Name"].str.replace('PIGILCollective','')
    df_PITag["Name"] = df_PITag["Name"].str.replace('\\\\','')
    
    if(level_YES):
        df_PITag = df_PITag[(df_PITag['Level'] == 'Y')]
    else:
        df_PITag = df_PITag[(df_PITag['Main category'].isin(main_category)) & (df_PITag['Sub category'].isin(sub_category))]
    
    df_PITag.rename(columns={'Sensor': 'Name'}, inplace=True)
   
    print(df_PITag.head())
    return df_PITag

In [7]:
def convert_inverse_data(input_df, df_PITag_Value):
    
    df_PITag_inverse = df_PITag_Value[(df_PITag_Value["Valve"]=='Yes')]
    list_tags = df_PITag_inverse['Combined_tags'].tolist()
    update_df = pd.DataFrame()
    for index, row in input_df.iterrows():
        if any(index in s for s in list_tags):
            row_new = 100 - row
            #row_new = 1/row.add(1)
            row_new.name = row.name +'_Converted'
            update_df = update_df.append(row_new,ignore_index=False)

    return update_df

In [8]:
import scipy.cluster.hierarchy as shc
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
#inputData = pd.read_csv(r"C:\Users\tsvd\PIdataJuly2020.csv")

def autoAD_clustering(start_date, end_date, frequency_method, selected_CPP, level_chkbox, tags_main_picker, tags_sub_picker, granger_picker, granger_txt):
    
    print("Get specific Tags from UI")
    df_PITag = get_tag_data(selected_CPP, level_chkbox, tags_main_picker, tags_sub_picker)
    print(df_PITag.head())
    print("Get PI Data from Date Range, Frequency and Tags Selections")
    original_df = pi_connect_and_pulldata(df_PITag, start_date, end_date, frequency_method)
    original_df.to_csv(selected_CPP+"_"+datetime.now().strftime("%Y_%m_%d_%H_%M"+".csv"))
    #original_df = pd.read_csv(r"C:\Users\tsvd\Jo_auto_ad\SACPP_PI_DATA_edited.csv")
    print(original_df.head())
    print("Convert data to (1/(1+x))")
    df_convert_inverse_data = convert_inverse_data(original_df, df_PITag)
    df_convert_inverse_data.to_csv(selected_CPP+"_converted_inversed"+datetime.now().strftime("%Y_%m_%d_%H_%M"+".csv"))
    original_df = pd.concat([df_convert_inverse_data, original_df])
    print(original_df.head())
    original_df.to_csv(selected_CPP+"_converted_"+datetime.now().strftime("%Y_%m_%d_%H_%M"+".csv"))
    print("Data Preprocessing")
    print(original_df.info())
    original_df.index = original_df.index.str.replace(selected_CPP+'_','')
    original_df = original_df.transpose()
    #original_df.head()
    print('Transposing data frame')
    print(original_df.info())
    scaler = MinMaxScaler()
    df_scaled = pd.DataFrame(scaler.fit_transform(original_df), columns=original_df.columns)
    df_scaled = df_scaled.loc[:, (df_scaled != 0).any(axis=0)]
    #df_scaled.head()
    print('Scaling Data')
    print(df_scaled.info())
    df_scaled = df_scaled.dropna(axis = "columns")
    df_scaled.to_csv(selected_CPP+"_scaled_"+datetime.now().strftime("%Y_%m_%d_%H_%M"+".csv"))
    print("Method Activated!!!")
    print(granger_picker)
    if(granger_picker == "Granger Causality Test"):
    #if any("Granger Casuality Test" in item for item in granger_picker):
        
        result_msg = grangers_causation_topN_list(granger_txt, df_scaled, decimal_round_digit=9, topN=20, maxlag=6).to_string()
        print("Granger Activated!!!")
        print(result_msg)
        #result_msg = "Granger Completed!!!"
    else :
        
        #fig = plt.figure(figsize=(15, 15))
        #plt.title("Customer Dendograms")

        Z = shc.linkage(df_scaled, method = 'ward', metric = 'euclidean')
        #print(Z)
        # Plot with Custom leaves
        ax = shc.dendrogram(Z, orientation='left', labels=df_scaled.columns)

        id2name = dict(enumerate(df_scaled.columns))
        print("Export .json file")
        export_tree_json(Z, df_scaled.columns, selected_CPP+"_"+datetime.now().strftime("%Y_%m_%d_%H_%M"))
              
        result_msg = "Clustering Completed!!!"
    
    print("!!!AUTO-AD DONE!!!")
    
    return result_msg


In [10]:
import gradio as gr

sub_category = ['Separators',
'Air System',
'Condensate Stabilization',
'Air coolers',
'Heat Exchangers',
'Filter Coalescers',
'Filters',
'Compressors',
'Generators',
'Glycol',
'Heaters',
'Hydrocyclones',
'IGFs',
'Launchers',
'Pumps',
'Receivers',
'Sales Gas Meters',
'Storage Tanks',
'WHRU',
'PWIP'
]
#Shown time in CSV file will be Thailand time -7h
start_date = gr.inputs.Textbox(lines=1, default="07/05/20 00:00", label="Start Date (Month/Day/Year Thailand time)")
end_date = gr.inputs.Textbox(lines=1, default="07/05/20 21:00", label="End Date (Month/Day/Year Thailand time)")
frequency_method = gr.inputs.Dropdown(['15s', '1m', '15m','1h','1d'], label="Sampling frequency")
selected_CPP = gr.inputs.Radio(['SACPP', 'PACPP', 'Benchamas', 'NPCPP'], label="Select CPP")
level_chkbox = gr.inputs.Checkbox(label="Level?")
tags_main_picker = gr.inputs.CheckboxGroup(['Gas_Processing','Liquid_Processing','Utility'], label="Main category")
tags_sub_picker = gr.inputs.CheckboxGroup(sub_category, label="Sub category")
granger_picker = gr.inputs.Radio(['Hierarchical Clustering', 'Granger Causality Test'],label='Method')
granger_txt = gr.inputs.Textbox(lines=1, label='First Out Tag (Required for Granger Causality Test Only) Example: CEN_LIC2720_PV LIQUID LEVEL')

gr.Interface(
    autoAD_clustering, 
    [
        start_date, 
        end_date, 
        frequency_method,
        selected_CPP,
        level_chkbox,
        tags_main_picker,
        tags_sub_picker,
        granger_picker,
        granger_txt
    ], 
    #gr.outputs.Image(plot=True, label="clustering"), title="AUTO-AD",
    gr.outputs.Textbox(label="Result"),
    allow_flagging = False).launch()

ModuleNotFoundError: No module named 'gradio'

In [None]:
df = pd.read_csv(r"C:\Users\szjt\OneDrive - Chevron\Desktop\Working Files\Satun Troubleshooting\Hot Oil\HotOilForClustering.csv")
df = df.drop(['Unnamed: 0','Date'], axis = 1)
df = df.dropna(axis = "columns")
print(df.shape)
scaler = MinMaxScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)
df_scaled = df_scaled.loc[:, (df_scaled != 0).any(axis=0)]
print(df_scaled)
Z = shc.linkage(df_scaled.T, method = 'ward', metric = 'euclidean')
ax = shc.dendrogram(Z, orientation='left', labels=df_scaled.columns)
id2name = dict(enumerate(df_scaled.columns))
print("Export .json file")
export_tree_json(Z, df_scaled.columns, "SACPP"+"_Hotoil_"+datetime.now().strftime("%Y_%m_%d_%H_%M"))
