In [1]:
import json
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import os

In [2]:
def preprocess_text(text):
    stop_words = set(stopwords.words('english'))
    lemmatizer = WordNetLemmatizer()
    
    # Tokenize
    tokens = word_tokenize(text)
    
    # Remove punctuation and stopwords, and lemmatize
    tokens = [lemmatizer.lemmatize(word.lower()) for word in tokens if word.isalpha() and word.lower() not in stop_words]

    return tokens

In [3]:
def tfidf_cosine_similarity(list1, list2):
    # Join the list of strings into a single string
    str1 = ' '.join(list1)
    str2 = ' '.join(list2)
    
    documents = [str1, str2]
    
    # Create the Document-Term Matrix
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform(documents)
    
    # Compute Cosine Similarity
    cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
    return cosine_sim[0][0]

In [4]:
def jaccard_similarity(list1, list2):
    set1 = set(list1)
    set2 = set(list2)
    intersection = set1.intersection(set2)
    union = set1.union(set2)
    return len(intersection) / len(union)

In [62]:
def suggest_configuration(new_project_name, username, hist_connections):
    new_project_tokens = preprocess_text(new_project_name)
    similarity_ranking = []
    connection_ranking = {}

    # calculate similarity between history connections and the new project name
    for connection in hist_connections:
        similarity = tfidf_cosine_similarity(new_project_tokens, connection["representatives"])
        similarity_ranking.append((connection["process_instance_id"], similarity))
        connection_ranking[connection["process_instance_id"]] = connection

    # descending sort to find top 5 related configurations
    temporary_ranking = sorted(similarity_ranking, key=lambda x: x[1], reverse=True)
    selected_connections = [connection_ranking[top_config[0]] for top_config in temporary_ranking]
    
    final_connections = []
    final_ranking = []
    for i in range(len(selected_connections)):
        duplicate = False
        if len(final_connections) == 5:
            break
        else:
            for filtered_candidate in final_connections:
                if selected_connections[i]["project_name"] == filtered_candidate["project_name"]\
                and selected_connections[i]["app_name"] == filtered_candidate["app_name"]\
                and selected_connections[i]["pms_name"] == filtered_candidate["pms_name"]:
                    duplicate = True
                    break
        if not duplicate:
            final_connections.append(selected_connections[i])
            final_ranking.append(temporary_ranking[i])
    
    print(final_connections)

    # if there is user's old configuration in the top five related
    # take it
    # otherwise, take the most related configuration
    for i in range(len(final_connections)):
        if username in final_connections[i]["user_name"] :
            return final_connections[i], final_ranking[i]

    return final_connections, final_ranking
    

In [58]:
f = open('./simulated_projects/embedded_connections2.json', 'r')
data = json.load(f)

In [49]:
new_project_name = "Manufacture brake system for the new electric Peugeot"
username = "Furina"

In [63]:
suggested_config = suggest_configuration(new_project_name, username, data)

[{'timestamp': '2024-08-03T09:29:58.632285', 'process_instance_id': 3, 'project_name': 'Manufacture self-driving Car Brake', 'project_domain': 'Car Engineering', 'app_name': 'SIMATIC WinCC V8', 'app_location': 'C:\\Program Files(x86)\\Siemens\\WinCC\\Diagnose', 'pms_name': 'jBPM', 'pms_location': 'http://localhost:8080/kie-server/services/rest', 'user_name': 'Charlotte', 'tasks': ['Assemble the system into a prototype vehicle', 'Assemble the system into the final version'], 'representatives': ['manufactured', 'manufacturer', 'finals', 'components', 'pre-production', 'driver', 'vehicle', 'next', 'model', 'versions', 'assembling', 'prototypes', 'wheel', 'cars', 'assembled', 'manufacture', 'driving', 'design', 'suv', 'brake', 'assemble', 'round', 'edition', 'braking', 'original', 'match', 'manufactures', 'gather', 'pedal', 'second', 'prototype', 'introduced', 'assembles', 'manufacturing', 'designs', 'truck', 'brakes', 'vehicles', 'final', 'version', 'featured', 'reassemble', 'hydraulic', 

In [51]:
suggested_config

{'timestamp': '2024-08-03T09:29:58.632857',
 'process_instance_id': 73,
 'project_name': 'Manufacture self-driving Car Brake',
 'project_domain': 'Car Engineering',
 'app_name': 'SIMATIC WinCC V8',
 'app_location': 'C:\\Program Files(x86)\\Siemens\\WinCC\\Diagnose',
 'pms_name': 'BAPE',
 'pms_location': 'http://localhost:8081/api/process-instance',
 'user_name': 'Furina de Fontaine',
 'tasks': ['Assemble the system into a prototype vehicle',
  'Assemble the system into the final version'],
 'representatives': ['manufactured',
  'manufacturer',
  'finals',
  'components',
  'pre-production',
  'driver',
  'vehicle',
  'next',
  'model',
  'versions',
  'assembling',
  'prototypes',
  'wheel',
  'cars',
  'assembled',
  'manufacture',
  'driving',
  'design',
  'suv',
  'brake',
  'assemble',
  'round',
  'edition',
  'braking',
  'original',
  'match',
  'manufactures',
  'gather',
  'pedal',
  'second',
  'prototype',
  'introduced',
  'assembles',
  'manufacturing',
  'designs',
  'tr

In [53]:
new_project_name = "Assemble brake pedals Peugeot"
username = "Thomas"

In [64]:
suggest_configuration(new_project_name, username, data)

[{'timestamp': '2024-08-03T09:29:58.632285', 'process_instance_id': 3, 'project_name': 'Manufacture self-driving Car Brake', 'project_domain': 'Car Engineering', 'app_name': 'SIMATIC WinCC V8', 'app_location': 'C:\\Program Files(x86)\\Siemens\\WinCC\\Diagnose', 'pms_name': 'jBPM', 'pms_location': 'http://localhost:8080/kie-server/services/rest', 'user_name': 'Charlotte', 'tasks': ['Assemble the system into a prototype vehicle', 'Assemble the system into the final version'], 'representatives': ['manufactured', 'manufacturer', 'finals', 'components', 'pre-production', 'driver', 'vehicle', 'next', 'model', 'versions', 'assembling', 'prototypes', 'wheel', 'cars', 'assembled', 'manufacture', 'driving', 'design', 'suv', 'brake', 'assemble', 'round', 'edition', 'braking', 'original', 'match', 'manufactures', 'gather', 'pedal', 'second', 'prototype', 'introduced', 'assembles', 'manufacturing', 'designs', 'truck', 'brakes', 'vehicles', 'final', 'version', 'featured', 'reassemble', 'hydraulic', 

([{'timestamp': '2024-08-03T09:29:58.632285',
   'process_instance_id': 3,
   'project_name': 'Manufacture self-driving Car Brake',
   'project_domain': 'Car Engineering',
   'app_name': 'SIMATIC WinCC V8',
   'app_location': 'C:\\Program Files(x86)\\Siemens\\WinCC\\Diagnose',
   'pms_name': 'jBPM',
   'pms_location': 'http://localhost:8080/kie-server/services/rest',
   'user_name': 'Charlotte',
   'tasks': ['Assemble the system into a prototype vehicle',
    'Assemble the system into the final version'],
   'representatives': ['manufactured',
    'manufacturer',
    'finals',
    'components',
    'pre-production',
    'driver',
    'vehicle',
    'next',
    'model',
    'versions',
    'assembling',
    'prototypes',
    'wheel',
    'cars',
    'assembled',
    'manufacture',
    'driving',
    'design',
    'suv',
    'brake',
    'assemble',
    'round',
    'edition',
    'braking',
    'original',
    'match',
    'manufactures',
    'gather',
    'pedal',
    'second',
    'pr