In [None]:
!pip install tensorflow --quiet
!pip install transformers --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.1/7.1 MB[0m [31m55.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m21.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m96.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import pandas as pd
import tensorflow as tf

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding,Dropout
from keras.utils import to_categorical
import numpy as np
from gensim.models import FastText
from gensim.models import KeyedVectors
from keras.layers import Dropout


In [None]:
df= pd.read_csv('PLCdata.csv',index_col= 0)
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1398 entries, 0 to 1397
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Fault              1398 non-null   object
 1   Diagnostic         1398 non-null   object
 2   Corrective Action  1397 non-null   object
 3   PLC                1398 non-null   object
 4   Model              1398 non-null   object
dtypes: object(5)
memory usage: 65.5+ KB


In [None]:
df.head()

Unnamed: 0,Fault,Diagnostic,Corrective Action,PLC,Model
0,cracked glass failure,broken or cracked glass electrode,replace electrode if cracked,emerson,model e
1,cracked glass failure,broken or cracked glass electrode,check wiring for short,emerson,model e
2,zero offset error,reference electrode poisoned,replace reference electorode,emerson,model e
3,high reference imped,coated reference electrode,clean electrode as instructed in sensor manual,emerson,model e
4,high reference imped,sensor out of process,clean electrode as instructed in sensor manual,emerson,model e


In [None]:
columns_to_join = ['PLC', 'Model','Fault']

df['input'] = df[columns_to_join].apply(lambda x: ' '.join(x), axis=1)
df['input']

0        emerson model e cracked glass failure
1        emerson model e cracked glass failure
2            emerson model e zero offset error
3         emerson model e high reference imped
4         emerson model e high reference imped
                         ...                  
1393           siemens logo bolock input error
1394               siemens logo wiring problem
1395               siemens logo wiring problem
1396               siemens logo wiring problem
1397    siemens logo as interface voltage fail
Name: input, Length: 1398, dtype: object

In [None]:
columns_to_join = ['Diagnostic','Corrective Action']

df['output'] = df[columns_to_join].apply(lambda x: ', '.join(x.astype(str)), axis=1)
df['output']

0       broken or cracked glass electrode, replace ele...
1       broken or cracked glass electrode, check wirin...
2       reference electrode poisoned, replace referenc...
3       coated reference electrode, clean electrode as...
4       sensor out of process, clean electrode as inst...
                              ...                        
1393    insufficient memory space cannot add a block t...
1394    physical cable connections from the end device...
1395    there is no appropriate conductor crosssection...
1396    cable length exceeds the specifications, make ...
1397    communication between the logo system and the ...
Name: output, Length: 1398, dtype: object

In [None]:
columns_to_join = ['input','output']
df['data'] = df[columns_to_join].apply(lambda x: ' '.join(x.astype(str)), axis=1)
df['data'].head()

0    emerson model e cracked glass failure broken o...
1    emerson model e cracked glass failure broken o...
2    emerson model e zero offset error reference el...
3    emerson model e high reference imped coated re...
4    emerson model e high reference imped sensor ou...
Name: data, dtype: object

In [None]:
import re

def add_end_token(text):
    text = re.sub(r'([^.]*\.)', r'\1 <end>', text)
    text = text.strip().replace(' <end>', '<end>')
    if not text.endswith('<end>'):
        text += ' <end>'
    return text

df['data'] = df['data'].apply(add_end_token)

In [None]:
data = df['data']

tokenizer = Tokenizer()
tokenizer.fit_on_texts(data)
sequences = tokenizer.texts_to_sequences(data)

max_sequence_length = max([len(seq) for seq in sequences])
input_data = []
output_data = []
for sequence in sequences:
    for i in range(1, len(sequence)):
        input_seq = sequence[:i]
        input_seq = pad_sequences([input_seq], maxlen=max_sequence_length)[0]
        output_seq = to_categorical(sequence[i], num_classes=len(tokenizer.word_index) + 1)
        input_data.append(input_seq)
        output_data.append(output_seq)
input_data = np.array(input_data)
output_data = np.array(output_data)

fasttext = KeyedVectors.load_word2vec_format('/content/drive/MyDrive/wiki-news-300d-1M.vec')

embedding_dim = 300
embedding_matrix = np.zeros((len(tokenizer.word_index) + 1, embedding_dim))
for word, i in tokenizer.word_index.items():
    if word in fasttext:
        embedding_matrix[i] = fasttext[word]
        
model = Sequential()
model.add(Embedding(len(tokenizer.word_index) + 1, embedding_dim, weights=[embedding_matrix], trainable=True))
model.add(LSTM(256, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(128))
model.add(Dense(len(tokenizer.word_index) + 1, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 300)         891600    
                                                                 
 lstm (LSTM)                 (None, None, 256)         570368    
                                                                 
 dropout (Dropout)           (None, None, 256)         0         
                                                                 
 lstm_1 (LSTM)               (None, 128)               197120    
                                                                 
 dense (Dense)               (None, 2972)              383388    
                                                                 
Total params: 2,042,476
Trainable params: 2,042,476
Non-trainable params: 0
_________________________________________________________________


In [None]:
from keras.models import load_model

model = load_model('/content/plc_lstm_12.h5')

model.fit(input_data, output_data,batch_size=256, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7efbb0357520>

In [None]:
model.fit(input_data, output_data,batch_size=256, epochs=8)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x7efbb0496950>

In [None]:
import json
with open('plc_lstm_tokenizer.json', 'w') as f:
    tokenizer_json = tokenizer.to_json()
    json.dump(tokenizer_json, f)
files.download('plc_lstm_tokenizer.json')


In [None]:

from google.colab import files

model.save('plc_lstm_14_model.h5')
files.download('plc_lstm_14_model.h5')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import json
from keras.preprocessing.text import tokenizer_from_json

with open('/content/jsonformatter [MConverter.eu].json', 'r') as f:
    json_string = f.read()

tokenizer_json = json.loads(json_string)
tokenizer= tokenizer_from_json(tokenizer_json)
import keras
model = keras.models.load_model("/content/plc_lstm_14_model.h5")



In [None]:

def generate_text(model, tokenizer, input_text, max_length=40):
    # initialize the generated output text with the input text
    generated_text = input_text
    # set the stop condition to False
    stop_condition = False
    while not stop_condition:
        # tokenize the input texta
        input_sequence = tokenizer.texts_to_sequences([generated_text])[0]
        # pad the input sequence
        input_sequence = pad_sequences([input_sequence], maxlen=max_length-1, padding='pre')
        # make a prediction
        prediction = model.predict(input_sequence)[0]
        # get the index of the predicted word
        predicted_index = np.argmax(prediction)
        # get the predicted word
        predicted_word = tokenizer.index_word.get(predicted_index, '')
        # check if we've generated the maximum length or found the end token
        if len(generated_text.split()) == max_length or predicted_word == 'end':
            stop_condition = True
        else:
            # append the predicted word to the generated text
            generated_text += ' ' + predicted_word
    return generated_text[len(input_text):]

In [None]:

input_text = 'emerson model e low input voltage'
generated_text = generate_text(model, tokenizer, input_text)
print(generated_text)

 open connection from glass electrode to preamplifier check the connection between the glass electorode and preamplifier
