In [40]:
## Import libraries
import os
import nltk
from nltk.translate.bleu_score import corpus_bleu
from rouge_score import rouge_scorer
from typing import Union
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_community.document_loaders import AzureAIDocumentIntelligenceLoader
from langchain_openai import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Pinecone
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone
from langchain_openai import OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import LLMChain
from langchain.chains import SequentialChain
from dotenv import load_dotenv

In [41]:
## Read variables from env file
load_dotenv()  
OPENAI_KEY=os.getenv("OPENAI_API_KEY")
PINECONE_API_KEY=os.getenv("PINECONE_API_KEY")
PINECONE_INDEX=os.getenv("PINECONE_INDEX_NAME")
AZURE_API_KEY1=os.getenv("AZURE_KEY1")
AZURE_API_KEY2=os.getenv("AZURE_KEY2")
AZURE_API_ENDPOINT=os.getenv("AZURE_ENDPOINT")

### Step 1: Read Source Data Files, Data Cleaning, Splitting to chunks, Convert and Store it in Vector Database


Data Source 1: Interface Functional Specification Document (PDF)

Data Source 2: Interface Data Flow (PDF)

Data Source 3: Production Support Issues & Resolutions (CSV)

Data Source 4: Interface Mapping Sheet (XLSX)

Data Source 5: Interface Architecture, Failure Modes & Error Handling Mechanism (PPTX)

In [42]:
## Folder Path for Data Source Files
SOURCE_FILES_PATH="C:\\Users\\ASHOKKUMAR KALIAPPAN\\Documents\\Ashok\\MSc_DataAnalytics\\Final_Project\\Doc\\"

In [43]:
## Function for PDF extraction
def read_data_from_pdf(FILE_PATH,EXTRACT_IMAGE_INPUT):
    loader_dataflow = PyPDFLoader(FILE_PATH, extract_images=EXTRACT_IMAGE_INPUT)
    pages_dataflow = loader_dataflow.load()
    return pages_dataflow

In [44]:
## Function for CSV data extraction
def read_data_from_csv(FILE_PATH):
    loader_csv = CSVLoader(file_path=FILE_PATH)
    data_csv = loader_csv.load()
    return data_csv

In [45]:
## Function for MS Office docs extraction
def read_data_from_msdocs(FILE_PATH):
    loader_mso = AzureAIDocumentIntelligenceLoader(
        api_endpoint=AZURE_API_ENDPOINT, api_key=AZURE_API_KEY1, file_path=FILE_PATH, api_model="prebuilt-layout"
    )
    pages_msofficedata= loader_mso.load()
    return pages_msofficedata

In [46]:
## Read Data Source 1: Interface Functional Specification Document (PDF)
extracted_data_fsd = read_data_from_pdf (SOURCE_FILES_PATH + "Engineering_Datahub_Interface_FSD.pdf",False)
extracted_data_fsd

[Document(page_content=' \n                                                           \n \n Page 1 of 19  \n \n    \n \n \n \n \n \n \n \n \nEngineering  Datahub \nInterface  \n \nFunctional Specification  Document  \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n ', metadata={'source': 'C:\\Users\\ASHOKKUMAR KALIAPPAN\\Documents\\Ashok\\MSc_DataAnalytics\\Final_Project\\Doc\\Engineering_Datahub_Interface_FSD.pdf', 'page': 0}),
 Document(page_content='                                                   \n Engineering  Datahub  Interface - Functional Specification Document  \n \n Page 2 of 19  \nTable of Contents    \n1. Overview  ................................ ................................ ................................ ................................ .............................  3 \n2. Objective  ................................ ................................ ................................ ...............

In [47]:
## Read Data Source 2: Interface Data Flow (PDF)
extracted_data_interfaceflow = read_data_from_pdf (SOURCE_FILES_PATH + "Engineering_Datahub_Interface_ProcessFlow.pdf",True)
print(extracted_data_interfaceflow[0].page_content)

Engineering Datahub Interface Process FlowEngineering Datahub Interface Process Flow
Engineering 
DatahubEngineering 
DatahubWindchill  PLM Windchill  PLM MiddlewareMiddlewareComplete  Change Notice 
Audit TaskChange Notice Interface 
Failure Task is Created
Change Notice AuditorChange Notice  Details**
Update Change Notice  
Details** in Engineering 
Datahub systemYesChange Notice is promoted to 
Resolved  State  and Resulting 
objects is promoted to Target 
State (Released / Obsolete)
No
Is data update 
successful?**Change Notice  Details
• Engineering Release
• Part / Assembly
• BOM Structure
• Part / Assembly Drawing
• Engineering Specification 
DocumentResend Change Notice Details 
to Target SystemLog Errors/Send Mail to 
Production Support  TeamWindchill Support Team fix 
the issue & complete 
Change Notice Interface 
Failure Task
Notify 
Change Notice AuditorEnd
Data Transformation as per Business 
Rules  and publish the data to 
Engineering Datahub system
Change Notice  Details

In [48]:
## Data Source 3: Production Support Issues & Resolutions (CSV)
extracted_data_supportresolution = read_data_from_csv (SOURCE_FILES_PATH + "Incident_Management_Steps_Updated.csv")
extracted_data_supportresolution

[Document(page_content='INC Number: INC0001\nDescription: Description of Issue: user wanted to change the description of Change Notice\nSteps involved in Resolving the Ticket: 1. Check if the Change Notice is present in Windchill\n2. Check the existing description of Change Notice\n3. Based on the state and approval follow below steps.\n4. If the Change Notice is in Resolved state, we need approval from Business, apart from that we can change the description directly.\n5. Change Notice > Actions > Edit \n6. Change the description as required\nWork Log or Actions: Step 1: Accept the ticket and move to In Progress \nStep 2: Check state of the Change Notice  \nStep 3: Based on the state of the Change Notice steps are processed \nStep 4:If the Change Notice is in Resolved state, we need approval from Business, apart from that we can change the description directly.\nStep 5: If the Change Notice is in resolved state then collect the information from user on why the description needs to be c

In [49]:
## Read Data Source 4: Interface Mapping Sheet (XLSX)
extracted_data_interfacemapping = read_data_from_msdocs (SOURCE_FILES_PATH + "Engineering_Datahub_Interface_Mapping_Document_Updated.xlsx")
extracted_data_interfacemapping

[Document(page_content='Interface\\_Mapping\n===\n\nData Element\n\nWindchill PLM Object\n\nWindchill PLM Field Name\n\nWindchill PLM Field Description\n\nMiddleware Transformation Required ?\n\nMiddleware Transformation Rules\n\nEngineering Datahub Functionality\n\nEngineering Datahub Field Description\n\nEngineering Release\n\nChange Notice (CN)\n\nNumber\n\nNumber\n\nNo\n\nEngineering Release\n\nRelease Number (Change)\n\nEngineering Release\n\nChange Request (CR) / Change Notice (CN)\n\nCreated By\n\nCreated By\n\nYes\n\nIf migratedFlag = "true" then set changeNoticeRequester\n\nelseIf changeRequestCreatedBy != NULL then set \'changeRequestCreatedBy\'\n\nelse set \'changeNoticeCreatedBy\'\n\nEngineering Release\n\nChange Owner\n\nEngineering Release\n\nChange Notice (CN)\n\nAdvance Change Number\n\nAdvance Change Number\n\nNo\n\nEngineering Release\n\nAdvance Change Number\n\nPart / Assembly\n\nChange Notice (CN)\n\nNumber\n\nNumber\n\nNo\n\nItem\n\nRelease Number\n\nPart / Assembl

In [50]:
## Data Source 5: Interface Architecture, Failure Modes & Error Handling Mechanism (PPTX)
extracted_data_archdata = read_data_from_msdocs (SOURCE_FILES_PATH + "Engineering_Datahub_InterfaceArchitecture_FailureMode_ErrorHandling.pptx")
extracted_data_archdata

[Document(page_content='Engineering Datahub Interface\n===\n\nSolution Architecture, Failure Modes and Error Handling\n===\n\nInterface Solution Architecture\n===\n\nMiddleware\n\nEngineering Datahub\n\nTransform the data and publish to target system\n\nProcess and update CN details\n\nWindchill\n\nCN Audit Task Completion\n\nInterface Process Steps:\n\nWindchill will extract and publish Change Notice details to Middleware upon CN Audit Task Completion\n\nMiddleware will Transform the CN data based on transformation rules and publish to Engineering Datahub system\n\nEngineering Datahub system will process the CN details and send the processing status (Success or Failure) to Middleware\n\nMiddleware will further publish the processing status (Success or Failure) to Windchill PLM system\n\nWindchill will process the response and Release CN if the processing status is Success or create Interface Failure Task if the processing status is Failure\n\n1\n\nJSON: Change Notice details\n\n2\n\nP

### Step 2: Data Cleansing

In [286]:
import nltk
import re
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer


## Function to clean data
def clean_data(documents):
    ## convert to lowercase, lammatization, remove stop words and special chars (regex) (POS / Named Entity Recognition)
    cleaned_documents = []
    stop_words = stopwords.words('english')  # Get stop words
    type(documents)

    for i in range(len(documents)):
        document = documents[i]
        ##print(document.page_content)
        # Lowercase characters
        clean_text = document.page_content.lower()
        #print(clean_text)

        # Remove punctuation
        #clean_text = ''.join([char for char in clean_text if char.isalnum() or char.isspace()])

        #pattern = r"[^\w\s]"  # Match anything except letters, numbers, and whitespace
        pattern = r"[^a-zA-Z\s\n]+"
        clean_text=re.sub(pattern,"", clean_text)
        #print(type(clean_text))
        #print(clean_text)

        # Remove stop words and tokenize
        #tokens = [word for word in clean_text.split() if word not in stop_words]
        #print(tokens)

        # Optionally perform stemming/lemmatization (comment out if not needed)
        # stemmer = SnowballStemmer("english")
        # tokens = [stemmer.stem(item) for item in tokens]
        #lemmatizer=WordNetLemmatizer()
        #for j in range(len(clean_text)):
            #clean_text[j]=clean_text[j].lower()
            #words=nltk.word_tokenize(sentences[i])
            #words=[lemmatizer.lemmatize(word.lower(),pos='v') for word in words if word not in set(stopwords.words('english'))]
            #clean_text[j]=' '.join(words)# converting all the list of words into sentences

        # Join the cleaned tokens back to a string
        cleaned_documents.append(clean_text)

    #return cleaned_documents
    return cleaned_documents

In [287]:
type(extracted_data_fsd)
cleaned_documents_fsd = clean_data(extracted_data_fsd)
#len(extracted_data_fsd)
print(type(cleaned_documents_fsd))
print(cleaned_documents_fsd)

<class 'list'>
[' \n                                                           \n \n page  of   \n \n    \n \n \n \n \n \n \n \n \nengineering  datahub \ninterface  \n \nfunctional specification  document  \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n ', '                                                   \n engineering  datahub  interface  functional specification document  \n \n page  of   \ntable of contents    \n overview         \n objective         \n engineering datahub interface use cases list       \n engineering datahub interface solution design       \n solution assumptions        \n interface solution high level process       \n interface solution detailed        \n  release to engineering datahub sub process  interface validations      \n  windchill extraction process        \n  middleware data transformation rules       \n  engineering datahub system update process       \n  windchill change notice pro

### Step 3: Split Data into Chunks

In [51]:
## Function to split document into chunks
def chunk_data(docs,CHUNK_SIZE,CHUNK_OVERLAP):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE,chunk_overlap=CHUNK_OVERLAP)
    doc=text_splitter.split_documents(docs)
    return doc

In [52]:
## Chunk Interface FSD
documents_fsd=chunk_data(extracted_data_fsd,1000,600)
len(documents_fsd)

93

In [53]:
## Chunk Interface Flow
documents_interfaceflow=chunk_data(extracted_data_interfaceflow,1000,600)
len(documents_interfaceflow)

2

In [54]:
## Chunk Incident Resolution
documents_supportresolution=chunk_data(extracted_data_supportresolution,1000,600)
len(documents_supportresolution)

22

In [55]:
## Chunk Interface Mapping
documents_mapping=chunk_data(extracted_data_interfacemapping,1000,600)
len(documents_mapping)

8

In [56]:
## Chunk Architecture Details
documents_archdata=chunk_data(extracted_data_archdata,1000,600)
len(documents_archdata)

2

### Step 4: Embeddings: Convert Chunks to Vectors and Store in Vector DB

In [57]:
## Embedding using OpenAI - model='text-embedding-ada-002'
embeddings = OpenAIEmbeddings(api_key=OPENAI_KEY)
embeddings

OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x0000024825B14940>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x0000024825A38760>, model='text-embedding-ada-002', dimensions=None, deployment='text-embedding-ada-002', openai_api_version='', openai_api_base=None, openai_api_type='', openai_proxy='', embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True)

In [58]:
## Get the vector size / diemnsion of the embedding
vector_size = embeddings.embed_query("This is a test sentence.")

len(vector_size)

1536

In [60]:
## Pinecone VectorDB initiation, prior to this created pinecone with dimension = 1536
pc = Pinecone(api_key=PINECONE_API_KEY)
index=pc.Index(PINECONE_INDEX)

In [61]:
## Create embeddings for Interface FSD (Data Source 1)
pinecone_index = PineconeVectorStore.from_documents(documents_fsd, embeddings, index_name=PINECONE_INDEX)

In [62]:
## Create embeddings for Interface Flow Diagram (Data Source 2)
pinecone_index = PineconeVectorStore.from_documents(documents_interfaceflow, embeddings, index_name=PINECONE_INDEX)

In [63]:
## Create embeddings for Support Ticket Resolution (Data Source 3)
pinecone_index = PineconeVectorStore.from_documents(documents_supportresolution, embeddings, index_name=PINECONE_INDEX)

In [64]:
## Create embeddings for Interface Mapping (Data Source 4)
pinecone_index = PineconeVectorStore.from_documents(extracted_data_interfacemapping, embeddings, index_name=PINECONE_INDEX)

In [65]:
## Create embeddings for Interface Arch Data (Data Source 5)
pinecone_index = PineconeVectorStore.from_documents(documents_archdata, embeddings, index_name=PINECONE_INDEX)

### Step 5: RAG Implementation - Leverage Open AI Model, take input query from user & retrieve similar vectors from Vector DB, pass query + similar vectors to get the response

In [66]:
# Prompt Template

prompt=ChatPromptTemplate.from_messages(
    [
        ("system","You are a helpful support assistant. Please respond to the IT application support executive queries"),
        ("user","Question:{question}")
    ]
)

In [67]:
## Cosine Similarity Retreive Results from Pinecone VectorDB
def retrieve_query(query,k=5):
    matching_results=pinecone_index.similarity_search(query,k=k)
    return matching_results

In [68]:
## OpenAI Model - gpt-3.5-turbo and chain creation
llm=ChatOpenAI(openai_api_key=OPENAI_KEY,model_name="gpt-3.5-turbo", temperature=0.5)
chain=load_qa_chain(llm,chain_type="stuff")
#question_answer_chain = create_stuff_documents_chain(llm, prompt)
#rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [69]:
## Search answers for the user query
def retrieve_answers(query):
    doc_search=retrieve_query(query)
    response=chain.run(input_documents=doc_search,question=query)
    return response

In [70]:
user_query = "What is Engineering Datahub Interface Trigger?"
answer = retrieve_answers(user_query)
print(answer)

  warn_deprecated(


The Engineering Datahub Interface is triggered when a Change Notice is completed in Windchill PLM. The interface processes the information sent by Windchill, updates the Change Notice and related information in the Engineering Datahub system, and shares a success or failure response back to the Middleware, which then sends it back to Windchill. If the update is successful, the Change Notice is promoted to the Released state. If there is a failure, detailed information on the failure records is sent back for analysis and resolution by the Windchill Production Support Team.


In [71]:
user_query = "What are the data elements extracted from Change Notice?"
answer = retrieve_answers(user_query)
print(answer)

The data elements extracted from Change Notice are:
1. Engineering Release
2. Part / Assembly
3. BOM Structure
4. Part / Assembly Drawing
5. Engineering Specification Document


In [72]:
user_query = "What are the pre-validations of Engineering Datahub Interface?"
answer = retrieve_answers(user_query)
print(answer)

The pre-validations of the Engineering Datahub Interface include checking if the Engineering Datahub Interface Preference is enabled or disabled. If enabled, the process proceeds with further validations. If disabled, the "Release to Engineering Datahub" sub-process ends and continues with Change Notice workflow steps. Another pre-validation is the Eligibility Check, which validates if at least one of the eligible objects (Part, Assembly, Part/Assembly Drawing, Engineering Specification Document) is a resulting object of the Change Notice. Based on this check, the process either proceeds with the next validation if an eligible object is found, or ends the "Release to Engineering Datahub" sub-process if none of the resulting objects are eligible.


In [73]:
user_query = "What is the middleware transforamtion rules for 'Part Maturity' attribute in Engineering Datahub Interface?"
answer = retrieve_answers(user_query)
print(answer)

I don't have specific information about the middleware transformation rules for the 'Part Maturity' attribute in the Engineering Datahub Interface based on the provided context.


In [74]:
user_query = "What are the steps involved in resolving Engineering Datahub Interface failure?"
answer = retrieve_answers(user_query)
print(answer)

The steps involved in resolving Engineering Datahub Interface failure typically include:

1. Performing failure analysis to identify the root cause of the issue.
2. Taking necessary steps to resolve the failure, which may involve fixing errors, updating data, or making system adjustments.
3. Re-triggering the Engineering Datahub interface to initiate the data transfer process again.
4. Contacting the Windchill Production Support Team for updates on the resolution progress.

These steps are crucial in addressing and fixing any issues that may arise during the interface process.


In [75]:
user_query = "What are the failure modes of Engineering Datahub Interface?"
answer = retrieve_answers(user_query)
print(answer)

The failure modes of the Engineering Datahub Interface include processing failures and extraction failures. Processing failures occur when the Engineering Datahub system encounters issues processing the information sent by Windchill, resulting in a failure response being sent back. Extraction failures, on the other hand, happen when there are issues with extracting data, such as in the case of BOM structure changes not being extracted when an Assembly is promoted to the "Obsolete" state.


In [76]:
user_query = "What are the process steps of Engineering Datahub Interface?"
answer = retrieve_answers(user_query)
print(answer)

The process steps of the Engineering Datahub Interface include:

1. Release to Engineering Datahub Sub-Process & Interface Validations
2. Windchill Extraction Process
3. Middleware Data Transformation Rules
4. Engineering Datahub System Update Process
5. Windchill Change Notice Processing after Engineering Datahub Acknowledgement
6. Interface Failure Handling and Re-Trigger Mechanism

These steps involve tasks such as auditing change notices, updating data in the Engineering Datahub system, handling interface failures, and processing change notices in Windchill after acknowledgment from the Engineering Datahub.


### Step 6: Chat UI based on streamlit

### Step 7: Validations - Rouge Score (Rouge1, Rouge2, RougeL) & BLEU Score

In [84]:
## Calculate Rouge Score
def calc_rouge(qa_pairs):
  scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
  scores = {}
  for qa in qa_pairs:
    #print("reference_answer: " + qa["reference_answer"])
    #print("predicted_answer: " + qa["predicted_answer"])
    rouge_score = scorer.score(qa["reference_answer"], qa["predicted_answer"])
    for metric, value in rouge_score.items():
      if metric not in scores:
        scores[metric] = {"precision": [], "recall": [], "f1": []}
      scores[metric]["precision"].append(value[0])
      scores[metric]["recall"].append(value[1])
      scores[metric]["f1"].append(value[2])
  average_scores = {}
  for metric, stats in scores.items():
    average_scores[metric] = {
      "precision": sum(stats["precision"]) / len(stats["precision"]),
      "recall": sum(stats["recall"]) / len(stats["recall"]),
      "f1": sum(stats["f1"]) / len(stats["f1"])
    }
  return average_scores

In [85]:
# Q&A pairs with Question, Reference Answer and Predicted Answer
qa_pairs = [
  {"question": "What is the interface trigger for Engineering Datahub interface in Windchill?",
   "reference_answer": "Interface Trigger for Engineering Datahub interface is Change Notice Audit Task Completion",
   "predicted_answer": retrieve_answers("What is the interface trigger for Engineering Datahub interface in Windchill?")},
  {"question": "What are the potential failure modes related to Engineering Datahub interface failures?",
   "reference_answer": "The potential failure modes related to Engineering Datahub interface include: interface pre-validation, data extraction and middleware connectivity failure in Windchill, data transformation and Engineering Datahub system connectivity failures in Middleware system, data processing failure in Engineering Datahub system.",
   "predicted_answer": retrieve_answers("What are the potential failure modes related to Engineering Datahub interface failures?")},
  {"question": "What are the data elements extracted for Engineering Datahub interface?",
   "reference_answer": "Data elements extracted for Engineering Datahub interface includes, Engineering Release, Part / Assembly, BOM Structure, Part / Assembly Drawing, Engineering Specification Document",
   "predicted_answer": retrieve_answers("What are the data elements extracted for Engineering Datahub interface?")},
  {"question": "Middleware transformation rules to convert Windchill attribute to Engineering Datahub attribute",
   "reference_answer": "Middleware transformation rules to convert Windchill attribute to Engineering Datahub attribute are as follows: Part Maturity - Convert Windchill Part Maturity to Engineering Datahub Part Maturity Code value as per below transformation logic, Concept -> C, Limited Production -> L, Production -> P; Inventory Disposition - Convert Windchill Inventory Disposition to Engineering Datahub Inventory Disposition Code value as per below transformation logic, Use Existing -> U, Rework -> R, Scrap -> S; Kit - Convert Windchill Kit Code to Engineering Datahub Kit Code value as per below transformation logic, Box -> 1, Loose -> 2, Standard -> 3",
   "predicted_answer": retrieve_answers("Middleware transformation rules to convert Windchill attribute to Engineering Datahub attribute")},
   {"question": "Detail the windchill extraction logic for BOM structure for Engineering Datahub interface",
   "reference_answer": "Windchill extraction logic for BOM structure for Engineering Datahub interface as follows: Full Extraction - First Level BOM shall be extracted when Assembly with BOM Structure is created and released first time. Delta Extraction: Only Delta changes of the first level BOM structure shall the extracted when Assembly with BOM Structure is revised and promoted to Released state. Latest Version shall be compared with previous Released Version to calculate the delta changes.",
   "predicted_answer": retrieve_answers("Detail the windchill extraction logic for BOM structure for Engineering Datahub interface")},
  {"question": "Summarize the steps involved in resolving Engineering Datahub Interface failure tickets",
   "reference_answer": "Steps involved in resolving Engineering Datahub Interface failure tickets includes, 1. Check if any Admin task related to Interface Failure is generated in Windchill home page Tasks Table. 2. Check the Failure Code from Middleware Response JSON received for the Change Notice 3. Root Cause Analysis: Based on the RCA it is observed that middleware transformation logic for one of the attribute has not considered the new LoV added in Windchill 4. Worked with middleware team to implement the transformation logic for the new LoV 5. After Middleware team implemented the logic, interface has been re-trigged by completing the Interface Failure Task",
   "predicted_answer": retrieve_answers("Summarize the steps involved in resolving Engineering Datahub Interface failure tickets")},
  {"question": "What is the failure handling mechanism implemented for 'MW Service invocation Failure' in Engineering Datahub interface?",
   "reference_answer": "The failure handling mechanism implemented for 'MW Service invocation Failure' in Engineering Datahub interface include: Middleware Failure Task generation for Windchill Production Support Team and Email notification will be sent to CN Auditor. Windchill Production Support team shall analyse the failure cause, work with Middleware team to resolve the connection issue and complete the Middleware Failure Task to re-trigger the interface",
   "predicted_answer": retrieve_answers("What is the failure handling mechanism implemented for 'MW Service invocation Failure' in Engineering Datahub interface?")},
  {"question": "List down the interface use cases related to Engineering Datahub interface.",
   "reference_answer": "The interface use cases related to Engineering Datahub interface include: Create and Release Part through Change Request and Change Notice process, Revise and Release Part with attribute changes through Change Request and Change Notice process, Revise and Obsolete Part through Change Request and Change Notice process, Create and Release Assembly with BOM Structure through Change Request and Change Notice process, Revise and Release Assembly with BOM Structure changes (Add Component, Remove Component or Quantity change) through Change Request and Change Notice process, Revise and Obsolete Assembly",
   "predicted_answer": retrieve_answers("List down the interface use cases related to Engineering Datahub interface.")},
  {"question": "Summarize the solution assumptions of Engineering Datahub interface",
   "reference_answer": "The solution assumptions of Engineering Datahub interface includes, Interface will be applicable to following object types: Part, Assembly, Part / Assembly Drawing, Engineering Specification Document; All the new release and revisions to Part, Assembly, Part / Assembly Drawing, Engineering Specification Document will be performed through Change Notice process only; Any Admin changes performed on the Part, Assembly, Part / Assembly Drawing, Engineering Specification Document will not be interfaced to Engineering Datahub; In case of BOM Structure changes released through Change Notice process, only delta datachanges will be interfaced to Engineering Datahub, full BOM Structure will not be interfaced.",
   "predicted_answer": retrieve_answers("Summarize the solution assumptions of Engineering Datahub interface")},
  {"question": "Summarize the failure modes of Manufacturing ERP or MES Interface",
   "reference_answer": "Information is not available on the failure modes of Manufacturing ERP or MES Interface",
   "predicted_answer": retrieve_answers("Summarize the failure modes of Manufacturing ERP or MES Interface")}
   ]

# Generate average ROUGE score 
rouge_results = calc_rouge(qa_pairs)
print(f"Average Rouge1 Score: {rouge_results['rouge1']}")
print(f"Average Rouge2 Score: {rouge_results['rouge2']}")
print(f"Average RougeL Score: {rouge_results['rougeL']}")

Average Rouge1 Score: {'precision': 0.5725802274584761, 'recall': 0.7245431549172775, 'f1': 0.6265624926738024}
Average Rouge2 Score: {'precision': 0.37361207896459003, 'recall': 0.4790309152426488, 'f1': 0.4108752550447317}
Average RougeL Score: {'precision': 0.4993058446716535, 'recall': 0.6386418546928824, 'f1': 0.5494757861896821}


In [86]:
## Calculate BLEU Score
def calc_corpusbleu(qa_pairs):
    predicted_answers = []
    references_list = [[ref["reference_answer"]] for ref in qa_pairs]
    
    for qa in qa_pairs:
        predicted_answers.append(qa["predicted_answer"])
    
    bleu_score_corpus = corpus_bleu(references_list, predicted_answers)
    print("Corpus BLEU Score: ", bleu_score_corpus)
    
    return bleu_score_corpus

In [87]:
# Generate average ROUGE score 
bleu_results = calc_corpusbleu(qa_pairs)


Corpus BLEU Score:  0.5666755518418582
