Merge pull request #84 from SINTEF-9012/pipeline-improvements

Pipeline improvements
SINTEF-9012 · Dec 26, 2022 · 6ac6e4c · 6ac6e4c
2 parents 4a826ec + ae91f7b
commit 6ac6e4c
Show file tree

Hide file tree

Showing 45 changed files with 2,085 additions and 898 deletions.
diff --git a/.gitignore b/.gitignore
@@ -17,4 +17,6 @@ database_import/
 aas_import/
 aas_export/
 safety_backups/
-scatter-output.png
+scatter-output.png
+similarity_pipeline_status.yaml
+object_detection_outputs/
diff --git a/assets/custom_style.css b/assets/custom_style.css
@@ -589,7 +589,7 @@ body {
 }
 
 #left-sidebar-collapse-similarity-pipeline-content {
-    width: 320px;
+    width: 400px;
     height: 100%;
     max-height: 100%;
     overflow: hidden;
@@ -610,6 +610,10 @@ body {
     height: 100%;
 }
 
+.main-graph-visibility-switch-hidden {
+    display: none;
+}
+
 
 
 #left-sidebar-main-elements-container {

diff --git a/assets/cytoscape-graph-style.json b/assets/cytoscape-graph-style.json
@@ -75,6 +75,13 @@
             "background-image": "https://fonts.gstatic.com/s/i/short-term/release/materialsymbolsrounded/scatter_plot/wght500/48px.svg"
         }
     },
+    {
+        "selector": ".DIMENSION_CLUSTER",
+        "style": {
+            "background-color": "#6b449b",
+            "background-image": "https://fonts.gstatic.com/s/i/short-term/release/materialsymbolsrounded/scatter_plot/wght500/48px.svg"
+        }
+    },
     {
         "selector": ".ASSET_SIMILARITY",
         "style": {

diff --git a/backend/api/python_endpoints/asset_endpoints.py b/backend/api/python_endpoints/asset_endpoints.py
@@ -30,3 +30,18 @@ def delete_asset_similarities():
 
 def get_assets_count():
     return ASSETS_DAO.get_assets_count()
+
+
+def add_keyword(asset_iri: str, keyword: str):
+    """Adds the keyword by creating a relationship to the keyword and optionally creating the keyword node,
+    if it does not yet exist
+
+    Args:
+        asset_iri (str): _description_
+        keyword (str): _description_
+    """
+    ASSETS_DAO.add_keyword(asset_iri=asset_iri, keyword=keyword)
+
+
+def get_keywords_set_for_asset(asset_iri: str):
+    return ASSETS_DAO.get_keywords_set_for_asset(asset_iri=asset_iri)
diff --git a/backend/api/python_endpoints/file_endpoints.py b/backend/api/python_endpoints/file_endpoints.py
@@ -152,9 +152,33 @@ def add_keyword(file_iri: str, keyword: str):
     SUPPL_FILE_DAO.add_keyword(file_iri=file_iri, keyword=keyword)
 
 
+def save_extracted_properties(file_iri: str, properties_string: str):
+    SUPPL_FILE_DAO.save_extracted_properties(
+        file_iri=file_iri, properties_string=properties_string
+    )
+
+
 def save_extracted_text(file_iri: str, text: str):
     SUPPL_FILE_DAO.save_extracted_text(file_iri=file_iri, text=text)
 
 
-def get_keywords_set_for_asset(asset_iri: str):
-    return SUPPL_FILE_DAO.get_keywords_set_for_asset(asset_iri=asset_iri)
+def reset_dimension_clusters():
+    SUPPL_FILE_DAO.reset_dimension_clusters()
+
+
+def create_dimension_cluster(
+    iri: str, id_short: str, description: str | None = None, caption: str | None = None
+):
+    SUPPL_FILE_DAO.create_dimension_cluster(
+        iri=iri, id_short=id_short, description=description, caption=caption
+    )
+
+
+def add_file_to_dimension_cluster(file_iri: str, cluster_iri: str):
+    SUPPL_FILE_DAO.add_file_to_dimension_cluster(
+        file_iri=file_iri, cluster_iri=cluster_iri
+    )
+
+
+def get_dimensions_cluster_for_asset(asset_iri: str):
+    return SUPPL_FILE_DAO.get_dimensions_cluster_for_asset(asset_iri=asset_iri)
diff --git a/backend/api/python_endpoints/similarity_pipeline_endpoints.py b/backend/api/python_endpoints/similarity_pipeline_endpoints.py
@@ -0,0 +1,103 @@
+from multiprocessing import Process
+from backend.knowledge_graph.dao.AnnotationNodesDao import AnnotationNodesDao
+from backend.knowledge_graph.dao.BaseNodesDao import BaseNodeDao
+from backend.knowledge_graph.dao.DatabaseConnectionsDao import DatabaseConnectionsDao
+from backend.knowledge_graph.dao.RuntimeConnectionsDao import RuntimeConnectionsDao
+from backend.runtime_connections.RuntimeConnectionContainer import (
+    RuntimeConnectionContainer,
+)
+from similarity_pipeline.similarity_pipeline_1_ts_feature_extraction import (
+    similarity_pipeline_1_ts_feature_extraction,
+)
+from similarity_pipeline.similarity_pipeline_2_ts_dimensionality_reduction import (
+    similarity_pipeline_2_ts_dimensionality_reduction,
+)
+from similarity_pipeline.similarity_pipeline_3_ts_clustering import (
+    similarity_pipeline_3_ts_clustering,
+)
+from similarity_pipeline.similarity_pipeline_4_text_key_phrase_extraction import (
+    similarity_pipeline_4_text_key_phrase_extraction,
+)
+from similarity_pipeline.similarity_pipeline_5_cad_analysis import similarity_pipeline_5_cad_analysis
+from similarity_pipeline.similarity_pipeline_6_image_analysis import similarity_pipeline_6_image_analysis
+from similarity_pipeline.similarity_pipeline_7_asset_similarity import similarity_pipeline_7_asset_similarity
+from similarity_pipeline.similarity_pipeline_status_manager import (
+    SimilarityPipelineStatusManager,
+)
+
+BASE_NODE_DAO: BaseNodeDao = BaseNodeDao.instance()
+ANNOTATIONS_DAO: AnnotationNodesDao = AnnotationNodesDao.instance()
+DB_CON_DAO: DatabaseConnectionsDao = DatabaseConnectionsDao.instance()
+RT_CON_DAO: RuntimeConnectionsDao = RuntimeConnectionsDao.instance()
+
+RT_CON_CONTAINER: RuntimeConnectionContainer = RuntimeConnectionContainer.instance()
+
+SIMILARITY_MANAGER = SimilarityPipelineStatusManager.instance()
+
+
+def get_pipeline_status():
+    """Combined status endpoint. Should be preferred to use less API calls.
+
+    Returns:
+        _type_: dict
+    """
+
+    return SIMILARITY_MANAGER.read_status()
+
+
+def post_time_series_feature_extraction():
+    SIMILARITY_MANAGER.set_active(active=True, stage="time_series_feature_extraction")
+    pipeline_process: Process = Process(
+        target=similarity_pipeline_1_ts_feature_extraction,
+    )
+    pipeline_process.start()
+
+
+def post_time_series_dimensionality_reduction():
+    SIMILARITY_MANAGER.set_active(
+        active=True, stage="time_series_dimensionality_reduction"
+    )
+    pipeline_process: Process = Process(
+        target=similarity_pipeline_2_ts_dimensionality_reduction,
+    )
+    pipeline_process.start()
+
+
+def post_time_series_clustering():
+    SIMILARITY_MANAGER.set_active(active=True, stage="time_series_clustering")
+    pipeline_process: Process = Process(
+        target=similarity_pipeline_3_ts_clustering,
+    )
+    pipeline_process.start()
+
+
+def post_text_keyphrase_extraction():
+    SIMILARITY_MANAGER.set_active(active=True, stage="text_keyphrase_extraction")
+    pipeline_process: Process = Process(
+        target=similarity_pipeline_4_text_key_phrase_extraction,
+    )
+    pipeline_process.start()
+
+
+def post_cad_analysis():
+    SIMILARITY_MANAGER.set_active(active=True, stage="cad_analysis")
+    pipeline_process: Process = Process(
+        target=similarity_pipeline_5_cad_analysis,
+    )
+    pipeline_process.start()
+
+
+def post_image_analysis():
+    SIMILARITY_MANAGER.set_active(active=True, stage="image_analysis")
+    pipeline_process: Process = Process(
+        target=similarity_pipeline_6_image_analysis,
+    )
+    pipeline_process.start()
+
+
+def post_asset_similarity():
+    SIMILARITY_MANAGER.set_active(active=True, stage="asset_similarity")
+    pipeline_process: Process = Process(
+        target=similarity_pipeline_7_asset_similarity,
+    )
+    pipeline_process.start()
diff --git a/backend/api/python_endpoints/timeseries_endpoints.py b/backend/api/python_endpoints/timeseries_endpoints.py
@@ -161,9 +161,11 @@ def set_ts_reduced_feature_list(iri: str, reduced_feature_list: List):
     )
 
 
-def create_ts_cluster(iri: str, id_short: str, description: str | None = None):
+def create_ts_cluster(
+    iri: str, id_short: str, description: str | None = None, caption: str | None = None
+):
     TIMESERIES_NODES_DAO.create_ts_cluster(
-        iri=iri, id_short=id_short, description=description
+        iri=iri, id_short=id_short, description=description, caption=caption
     )
 
 

diff --git a/backend/api/rest_endpoints/similarity_pipeline_endpoints.py b/backend/api/rest_endpoints/similarity_pipeline_endpoints.py
@@ -0,0 +1,49 @@
+from backend.api.api import app
+import backend.api.python_endpoints.similarity_pipeline_endpoints as similarity_pipeline_endpoints
+
+
+@app.get("/similarity_pipeline/status")
+async def get_pipeline_status():
+    """Combined status endpoint for the similarity pipeline.
+
+    Returns:
+        _type_: json
+    """
+    status_dict = similarity_pipeline_endpoints.get_pipeline_status()
+
+    return status_dict
+
+
+@app.post("/similarity_pipeline/time_series_feature_extraction")
+async def post_time_series_feature_extraction():
+    similarity_pipeline_endpoints.post_time_series_feature_extraction()
+
+
+@app.post("/similarity_pipeline/time_series_dimensionality_reduction")
+async def post_time_series_dimensionality_reduction():
+    similarity_pipeline_endpoints.post_time_series_dimensionality_reduction()
+
+
+@app.post("/similarity_pipeline/time_series_clustering")
+async def post_time_series_clustering():
+    similarity_pipeline_endpoints.post_time_series_clustering()
+
+
+@app.post("/similarity_pipeline/text_keyphrase_extraction")
+async def post_text_keyphrase_extraction():
+    similarity_pipeline_endpoints.post_text_keyphrase_extraction()
+
+
+@app.post("/similarity_pipeline/cad_analysis")
+async def post_cad_analysis():
+    similarity_pipeline_endpoints.post_cad_analysis()
+
+
+@app.post("/similarity_pipeline/image_analysis")
+async def post_image_analysis():
+    similarity_pipeline_endpoints.post_image_analysis()
+
+
+@app.post("/similarity_pipeline/asset_similarity")
+async def post_asset_similarity():
+    similarity_pipeline_endpoints.post_asset_similarity()
diff --git a/backend/knowledge_graph/dao/AssetNodesDao.py b/backend/knowledge_graph/dao/AssetNodesDao.py
@@ -15,6 +15,7 @@
     NodeTypes,
     RelationshipTypes,
 )
+from graph_domain.similarities.ExtractedKeywordNode import ExtractedKeywordNode
 
 
 class AssetsDao(object):
@@ -126,3 +127,66 @@ def get_assets_count(self):
         ).to_table()[0][0]
 
         return assets_count
+
+    def add_keyword(self, asset_iri: str, keyword: str):
+        """Adds the keyword by creating a relationship to the keyword and optionally creating the keyword node,
+        if it does not yet exist
+
+        Args:
+            asset_iri (str): _description_
+            keyword (str): _description_
+        """
+        node = ExtractedKeywordNode(
+            id_short=f"extracted_keyword_{keyword}",
+            iri=f"www.sintef.no/aas_identifiers/learning_factory/similarity_analysis/extracted_keyword_{keyword}",
+            keyword=keyword,
+            _explizit_caption=keyword,
+        )
+        self.ps.graph_merge(node)
+
+        relationship = Relationship(
+            NodeMatcher(self.ps.graph)
+            .match(NodeTypes.ASSET.value, iri=asset_iri)
+            .first(),
+            RelationshipTypes.KEYWORD_EXTRACTION.value,
+            NodeMatcher(self.ps.graph)
+            .match(NodeTypes.EXTRACTED_KEYWORD.value, iri=node.iri)
+            .first(),
+        )
+
+        self.ps.graph_create(relationship)
+
+    def get_keywords_set_for_asset(self, asset_iri: str):
+        # File keywords
+        file_keywords_table = self.ps.graph_run(
+            "MATCH p=(a:"
+            + NodeTypes.ASSET.value
+            + ' {iri: "'
+            + asset_iri
+            + '"})-[r1:'
+            + RelationshipTypes.HAS_SUPPLEMENTARY_FILE.value
+            + "]->(t)-[r2:"
+            + RelationshipTypes.KEYWORD_EXTRACTION.value
+            + "]->(c) RETURN c.keyword"
+        ).to_table()
+
+        file_keyword_list = [keyword[0] for keyword in file_keywords_table]
+
+        # Asset keywords
+        asset_keywords_table = self.ps.graph_run(
+            "MATCH p=(a:"
+            + NodeTypes.ASSET.value
+            + ' {iri: "'
+            + asset_iri
+            + '"})-[r1:'
+            + RelationshipTypes.KEYWORD_EXTRACTION.value
+            + "]->(c) RETURN c.keyword"
+        ).to_table()
+
+        asset_keyword_list = [keyword[0] for keyword in asset_keywords_table]
+
+        # Combine
+        keyword_list = file_keyword_list
+        keyword_list.extend(asset_keyword_list)
+
+        return set(keyword_list)