In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install torch



In [1]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import RGCNConv
import torch_scatter


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import RGCNConv
from torch_scatter import scatter_mean
from torch.nn import Dropout, LayerNorm, Linear
from torch_geometric.nn import GraphNorm

class SimplifiedRGCNModel(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_relations, edge_feature_dim):
        super(SimplifiedRGCNModel, self).__init__()
        
        # R-GCN layers to process node features
        self.rgcn1 = RGCNConv(in_channels, hidden_channels, num_relations)
        self.rgcn2 = RGCNConv(hidden_channels, out_channels, num_relations)
        
        # Dropout and normalization
        self.dropout = Dropout(0.5)  # Increased dropout rate to combat overfitting
        self.graph_norm = GraphNorm(out_channels)
        
        # Linear layer for edge features
        self.edge_feature_transform = Linear(edge_feature_dim, out_channels)
    
    def forward(self, x, edge_index, edge_type, edge_attr):
        # Transform edge features
        edge_attr_transformed = F.relu(self.edge_feature_transform(edge_attr))
        
        # First R-GCN layer with dropout
        x = self.rgcn1(x, edge_index, edge_type)
        x = F.relu(x)
        x = self.dropout(x)
        
        # Second R-GCN layer with normalization
        x = self.rgcn2(x, edge_index, edge_type)
        x = self.graph_norm(x)
        
        # Aggregate edge information
        edge_aggr = scatter_mean(edge_attr_transformed, edge_index[0], dim=0, dim_size=x.size(0))
        x = x + edge_aggr
        
        return x

In [3]:
import json

# Load the graph data from 'final_graph.json'
with open('final_graph.json', 'r') as f:
    data = json.load(f)
with open('Embeddings/node_ids.json', 'r') as file:
    node_ids_list = json.load(file)


In [4]:
import torch

# Assume 'data' is your graph data loaded from the JSON file
# Extract all unique edge types from the data
unique_edge_types = set(relationship['type'] for relationship in data.get('Relationships', []))

# Create a mapping from edge type strings to unique integers
edge_type_mapping = {edge_type: idx for idx, edge_type in enumerate(unique_edge_types)}

# Print the number of unique edge types
print("Number of unique edge types:", len(edge_type_mapping))
edge_type_mapping

Number of unique edge types: 452


{'creates': 0,
 'consults with': 1,
 'involves': 2,
 'references': 3,
 'can adopt': 4,
 'utilizes strategy': 5,
 'impacts': 6,
 'validates': 7,
 'used in': 8,
 'measures': 9,
 'relates_to': 10,
 'modulates': 11,
 'integral to': 12,
 'can be complemented by': 13,
 'evaluates_impact_on': 14,
 'is composed of': 15,
 'is derived from': 16,
 'implements strategy': 17,
 'aligned with': 18,
 'combines with': 19,
 'derives from': 20,
 'addresses': 21,
 'is achieved by': 22,
 'maintains alignment with': 23,
 'is structured by': 24,
 'is impacted by': 25,
 'requires strategy': 26,
 'assessed by': 27,
 'feeds into': 28,
 'prioritizes': 29,
 'facilitates': 30,
 'monitored by': 31,
 'entails': 32,
 'counteracts': 33,
 'must_align_with': 34,
 'creates_value_for': 35,
 'may involve': 36,
 'supports': 37,
 'supported_by': 38,
 'assigns': 39,
 'is caused by': 40,
 'feedback loop': 41,
 'challenges': 42,
 'benefits': 43,
 'approves_and_supports': 44,
 'related to': 45,
 'emphasizes': 46,
 'is used simil

In [5]:
# Initialize lists to store the source and target node indices and edge types
source_nodes = []
target_nodes = []
edge_types = []

# Example mapping of node IDs to indices (you should have this from your node embeddings)
node_id_to_index = {node_id: index for index, node_id in enumerate(node_ids_list)}

# Populate the source_nodes, target_nodes, and edge_types lists using your relationships data
for relationship in data.get('Relationships', []):
    source = relationship.get('source')
    target = relationship.get('target')
    rel_type = relationship.get('type')

    # Convert node IDs to indices using the mapping
    if source in node_id_to_index and target in node_id_to_index:
        source_index = node_id_to_index[source]
        target_index = node_id_to_index[target]
        source_nodes.append(source_index)
        target_nodes.append(target_index)
        edge_types.append(edge_type_mapping[rel_type])  # Map the relationship type to an integer

# Construct the edge_index and edge_type tensors
edge_index = torch.tensor([source_nodes, target_nodes], dtype=torch.long)
edge_type = torch.tensor(edge_types, dtype=torch.long)

print("Edge index tensor created successfully:", edge_index)
print("Edge type tensor created successfully:", edge_type)


Edge index tensor created successfully: tensor([[   0,    0,    1,  ..., 2808, 2809, 2809],
        [ 304,  323,    2,  ..., 2809, 2810, 2811]])
Edge type tensor created successfully: tensor([388,  37,  61,  ...,  86, 260, 260])


In [6]:
import torch
from torch_geometric.data import Data
import numpy as np

# --- Load Node and Relationship Embeddings ---

# Load node embeddings (e.g., from your saved .npy file)
node_embeddings_array = np.load('Embeddings/node_embeddings.npy')
node_features = torch.tensor(node_embeddings_array, dtype=torch.float)

# Load relationship embeddings (edge attributes)
combined_embeddings = np.load('Embeddings/relation_embeddings.npy')
edge_features = torch.tensor(combined_embeddings, dtype=torch.float)


# --- Create the Data Object for PyTorch Geometric ---
data_pyg = Data(x=node_features, edge_index=edge_index, edge_attr=edge_features)

print("Data object for PyTorch Geometric created successfully.")


Data object for PyTorch Geometric created successfully.


In [8]:
import torch
import torch.nn as nn
from torch.optim import Adam

# Define the device (GPU if available, otherwise CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize the model and move it to the device
model = SimplifiedRGCNModel(
    in_channels=node_features.size(1),
    hidden_channels=64,  # You can adjust this value
    out_channels=32,     # You can adjust this value
    num_relations=len(edge_type_mapping),  # Number of unique edge types
    edge_feature_dim=edge_features.size(1)  # Dimension of edge features
).to(device)

# Move data and edge types to the device
data_pyg = data_pyg.to(device)
edge_type = edge_type.to(device)

# Define the optimizer
optimizer = Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

# Define the loss function (Binary Cross-Entropy Loss with Logits)
loss_fn = nn.BCEWithLogitsLoss()  # Use BCEWithLogitsLoss for binary classification

print("Model, optimizer, and loss function defined successfully.")


Model, optimizer, and loss function defined successfully.


In [9]:
import torch

# Initialize the RGCN model architecture
model_rgcn = SimplifiedRGCNModel(
    in_channels=node_features.size(1),
    hidden_channels=64,  # Use the same values as used during training
    out_channels=32,     # Use the same values as used during training
    num_relations=len(edge_type_mapping),  # Number of unique edge types
    edge_feature_dim=edge_features.size(1)  # Dimension of edge features
).to(device)  # Move to the correct device (CPU or GPU)

# Load the model state dictionary
model_path = "rgcn_model.pth"
model_rgcn.load_state_dict(torch.load(model_path, map_location=device))

# Set the model to evaluation mode
model_rgcn.eval()

print(f"Model loaded successfully from {model_path}")


Model loaded successfully from rgcn_model.pth


  model_rgcn.load_state_dict(torch.load(model_path, map_location=device))


In [18]:
import json
import torch
import numpy as np
from torch import nn
from torch.optim import Adam

# Load JSON files for nodes and edges
with open('final_graph.json') as f:
    graph_data = json.load(f)
with open('Embeddings/node_ids.json') as f:
    node_ids = json.load(f)
with open('Embeddings/relation_keys.json') as f:
    relation_keys = json.load(f)

# Load pre-trained node and relation embeddings
##node_embeddings = np.load('Embeddings/node_embeddings.npy')
relation_embeddings = np.load('Embeddings/relation_embeddings.npy')


In [35]:
# Ensure the RGCN model is in evaluation mode
model_rgcn.eval()

# Generate node embeddings using the trained RGCN model
with torch.no_grad():
    node_embeddings = model_rgcn(data_pyg.x, data_pyg.edge_index, edge_type, data_pyg.edge_attr)

In [36]:
import networkx as nx

# Initialize NetworkX graph
G = nx.Graph()

# Add nodes
for node in node_ids:
    G.add_node(node)

for edge in graph_data["Relationships"]:  # Update the key based on the actual structure
    src, dst = edge["source"], edge["target"]
    if src in node_ids and dst in node_ids:
        G.add_edge(src, dst)


In [37]:
import torch
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

# Standardize the node embeddings if they aren't already
scaler = StandardScaler()
node_features = torch.tensor(scaler.fit_transform(node_embeddings), dtype=torch.float32)


# Get positive edges (existing edges in the graph)
positive_edges = np.array(G.edges())

# Generate negative edges (non-existent edges) for training
non_edges = np.array(list(nx.non_edges(G)))
negative_edges = non_edges[np.random.choice(len(non_edges), len(positive_edges), replace=False)]

# Split data into training and testing sets
train_pos, test_pos = train_test_split(positive_edges, test_size=0.2)
train_neg, test_neg = train_test_split(negative_edges, test_size=0.2)


In [38]:
train_pos_labels = torch.ones(len(train_pos), 1)    # Positive samples labeled as 1
train_neg_labels = torch.zeros(len(train_neg), 1)   # Negative samples labeled as 0
test_pos_labels = torch.ones(len(test_pos), 1)
test_neg_labels = torch.zeros(len(test_neg), 1)

# -----------------------------
# 6. Checking for Imbalance
# -----------------------------

print("Number of positive samples in train:", train_pos_labels.sum().item())
print("Number of negative samples in train:", len(train_neg_labels))
print("Number of positive samples in test:", test_pos_labels.sum().item())
print("Number of negative samples in test:", len(test_neg_labels))

Number of positive samples in train: 2693.0
Number of negative samples in train: 2693
Number of positive samples in test: 674.0
Number of negative samples in test: 674


In [39]:
class LinkPredictor(nn.Module):
    def __init__(self, in_feats):
        super(LinkPredictor, self).__init__()

        # Define a deeper neural network with more layers
        self.fc1 = nn.Linear(in_feats * 2, 256)  # First layer with increased units
        self.fc2 = nn.Linear(256, 128)           # Second layer
        self.fc3 = nn.Linear(128, 64)            # Third layer
        self.fc4 = nn.Linear(64, 32)             # Additional fourth layer
        self.fc5 = nn.Linear(32, 1)              # Output layer

        # Dropout layers for regularization
        self.dropout1 = nn.Dropout(0.3)
        self.dropout2 = nn.Dropout(0.3)
        self.dropout3 = nn.Dropout(0.3)

    def forward(self, node_pair):
        x = torch.cat([node_pair[0], node_pair[1]], dim=1)  # Concatenate node embeddings

        # Apply layers with ReLU activations and dropout
        x = torch.relu(self.fc1(x))
        x = self.dropout1(x)

        x = torch.relu(self.fc2(x))
        x = self.dropout2(x)

        x = torch.relu(self.fc3(x))
        x = self.dropout3(x)

        x = torch.relu(self.fc4(x))

        return torch.sigmoid(self.fc5(x))  # Final output with sigmoid for probability



In [40]:
# Create a mapping from node IDs (strings) to integer indices
node_to_index = {node_id: idx for idx, node_id in enumerate(node_ids)}

In [41]:
# Function to prepare edge pairs
# Function to prepare edge pairs using the node_to_index mapping
def prepare_edge_pairs(edges):
    # Unsqueeze each embedding to ensure shape is [1, embedding_dim]
    return [(node_features[node_to_index[edge[0]]].unsqueeze(0),
             node_features[node_to_index[edge[1]]].unsqueeze(0)) for edge in edges]



train_pos_pairs = prepare_edge_pairs(train_pos)
train_neg_pairs = prepare_edge_pairs(train_neg)
test_pos_pairs = prepare_edge_pairs(test_pos)
test_neg_pairs = prepare_edge_pairs(test_neg)

# Labels
train_pos_labels = torch.ones(len(train_pos_pairs), 1)
train_neg_labels = torch.zeros(len(train_neg_pairs), 1)
test_pos_labels = torch.ones(len(test_pos_pairs), 1)
test_neg_labels = torch.zeros(len(test_neg_pairs), 1)


In [42]:
from torch.optim import Adam
import torch

# Initialize the model, optimizer, and loss function
model = LinkPredictor(in_feats=node_embeddings.shape[1])
optimizer = Adam(model.parameters(), lr=1e-3)  # Try a lower learning rate
loss_fn = nn.BCELoss()

# Combine positive and negative pairs for training
train_pairs = train_pos_pairs + train_neg_pairs
train_labels = torch.cat([train_pos_labels, train_neg_labels])

# Flatten train_labels once before the training loop
train_labels = train_labels.view(-1)

# Combine positive and negative pairs for testing
test_pairs = test_pos_pairs + test_neg_pairs
test_labels = torch.cat([test_pos_labels, test_neg_labels]).view(-1)

# Training loop
num_epochs = 150
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()

    # Forward pass and compute predictions for each training pair
    predictions = torch.cat([model(pair) for pair in train_pairs], dim=0).squeeze()

    # Compute training loss
    loss = loss_fn(predictions, train_labels)

    # Backpropagation
    loss.backward()
    optimizer.step()

    # Calculate training accuracy
    train_preds = (predictions >= 0.5).float()  # Convert probabilities to binary predictions
    train_accuracy = (train_preds == train_labels).float().mean().item()

    # Evaluate on the test set
    model.eval()
    with torch.no_grad():
        test_predictions = torch.cat([model(pair) for pair in test_pairs], dim=0).squeeze()
        test_loss = loss_fn(test_predictions, test_labels)

        # Calculate test accuracy
        test_preds = (test_predictions >= 0.5).float()
        test_accuracy = (test_preds == test_labels).float().mean().item()

    if epoch % 10 == 0:
        print(f"Epoch {epoch}/{num_epochs}, Loss: {loss.item():.4f}, "
              f"Train Accuracy: {train_accuracy:.4f}%, "
              f"Test Accuracy: {test_accuracy:.4f}%, "
              f"Test Loss: {test_loss.item():.4f}")


Epoch 0/150, Loss: 0.6952, Train Accuracy: 0.4833%, Test Accuracy: 0.5000%, Test Loss: 0.6922
Epoch 10/150, Loss: 0.6604, Train Accuracy: 0.5375%, Test Accuracy: 0.5935%, Test Loss: 0.6538
Epoch 20/150, Loss: 0.6242, Train Accuracy: 0.6608%, Test Accuracy: 0.6803%, Test Loss: 0.6124
Epoch 30/150, Loss: 0.5641, Train Accuracy: 0.7215%, Test Accuracy: 0.7092%, Test Loss: 0.5466
Epoch 40/150, Loss: 0.5049, Train Accuracy: 0.7447%, Test Accuracy: 0.7448%, Test Loss: 0.4854
Epoch 50/150, Loss: 0.4666, Train Accuracy: 0.7687%, Test Accuracy: 0.7619%, Test Loss: 0.4583
Epoch 60/150, Loss: 0.4420, Train Accuracy: 0.7837%, Test Accuracy: 0.7737%, Test Loss: 0.4426
Epoch 70/150, Loss: 0.4172, Train Accuracy: 0.7976%, Test Accuracy: 0.7789%, Test Loss: 0.4344
Epoch 80/150, Loss: 0.3999, Train Accuracy: 0.8102%, Test Accuracy: 0.7826%, Test Loss: 0.4261
Epoch 90/150, Loss: 0.3802, Train Accuracy: 0.8210%, Test Accuracy: 0.7915%, Test Loss: 0.4246
Epoch 100/150, Loss: 0.3651, Train Accuracy: 0.8238

In [43]:
def predict_link(node_a, node_b, node_to_index, node_features, model, threshold=0.5):
    """
    Predicts the probability of a link existing between node_a and node_b.

    Args:
        node_a (str): Identifier for the first node.
        node_b (str): Identifier for the second node.
        node_to_index (dict): Mapping from node IDs to indices.
        node_features (torch.Tensor): Tensor containing node feature embeddings.
        model (nn.Module): Trained link prediction model.
        threshold (float, optional): Threshold to decide link existence. Defaults to 0.5.

    Returns:
        float: Probability score of the link existing.
        int: Binary prediction (1 for link exists, 0 otherwise).
    """
    if node_a not in node_to_index:
        raise ValueError(f"Node '{node_a}' not found in the graph.")
    if node_b not in node_to_index:
        raise ValueError(f"Node '{node_b}' not found in the graph.")

    idx_a = node_to_index[node_a]
    idx_b = node_to_index[node_b]

    feat_a = node_features[idx_a].unsqueeze(0)  # Shape: [1, in_feats]
    feat_b = node_features[idx_b].unsqueeze(0)  # Shape: [1, in_feats]

    model.eval()
    with torch.no_grad():
        prediction = model((feat_a, feat_b)).item()

    binary_prediction = int(prediction >= threshold)

    return prediction, binary_prediction

# Example usage of the prediction function
example_node_a = 'Standard'  # Replace with an actual node ID
example_node_b = 'Standard_for_Project_Management_Fourth_Edition'  # Replace with an actual node ID

try:
    prob, pred = predict_link(
        example_node_a,
        example_node_b,
        node_to_index,
        node_features,
        model,
        threshold=0.5  # Adjust threshold as needed
    )
    print(f"\nLink Prediction Result:")
    print(f"Probability of link between '{example_node_a}' and '{example_node_b}': {prob:.4f}")
    print(f"Link Prediction: {'Exists' if pred == 1 else 'Does Not Exist'}")
except ValueError as e:
    print(e)


Link Prediction Result:
Probability of link between 'Standard' and 'Standard_for_Project_Management_Fourth_Edition': 0.1196
Link Prediction: Does Not Exist


In [44]:
torch.save(model.state_dict(), 'link_predictor_model.pth')

In [None]:
!pip install groq

Collecting groq
  Downloading groq-0.11.0-py3-none-any.whl.metadata (13 kB)
Downloading groq-0.11.0-py3-none-any.whl (106 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.5/106.5 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq
Successfully installed groq-0.11.0


In [None]:
import numpy as np
import json
import torch
import torch.nn as nn
import torch.nn.functional as F
from sentence_transformers import SentenceTransformer
import os
from groq import Groq

# Initialize Groq client
client = Groq(api_key="api_key")

# Load data
#node_embeddings = np.load('Embeddings/node_embeddings.npy')
node_features = torch.tensor(node_embeddings, dtype=torch.float32)
node_data = json.load(open('final_updated_graph.json'))

# Extract node types, names, descriptions, and relations, and create mappings
node_ids = [node['id'] for node in node_data['Nodes']]
node_types = {node['id']: node['type'] for node in node_data['Nodes']}
node_names = {node['id']: node.get('name', node['id']) for node in node_data['Nodes']}
node_properties = {node['id']: node.get('properties', {}) for node in node_data['Nodes']}
node_to_index = {node_id: idx for idx, node_id in enumerate(node_ids)}

# Define relationships with "type" as the verb equivalent
relations = {(edge['source'], edge['target']): edge['type'] for edge in node_data['Relationships']}

model = LinkPredictor(in_feats=node_features.shape[1])
model.load_state_dict(torch.load('link_predictor_model.pth', weights_only=True))
model.eval()

# Sentence-BERT and projection layer for project description embedding
embedder = SentenceTransformer('paraphrase-MiniLM-L6-v2')
embedding_projection = nn.Linear(384, 32)

def get_project_embedding(description):
    raw_embedding = torch.tensor(embedder.encode(description), dtype=torch.float32)
    return embedding_projection(raw_embedding)

def find_similar_nodes(project_embedding, top_k=5):
    similarities = F.cosine_similarity(project_embedding, node_features)
    top_k_indices = similarities.topk(top_k).indices
    return [node_ids[idx] for idx in top_k_indices]

def recommend_strategies_risks_outcomes_kpis(similar_nodes, top_k=5, threshold=0.8):
    recommendations, risks, outcomes, kpis = [], [], [], []
    related_nodes = []
    relevant_relations = []

    for node_id in similar_nodes:
        node_idx = node_to_index[node_id]
        node_embedding = node_features[node_idx].unsqueeze(0)

        for target_idx, target_id in enumerate(node_ids):
            if target_id != node_id:
                target_embedding = node_features[target_idx].unsqueeze(0)
                link_prob = model((node_embedding, target_embedding)).item()
                target_type = node_types[target_id]

                if link_prob >= threshold:
                    if target_type == "Strategy":
                        recommendations.append((target_id, link_prob))
                    elif target_type == "Risk":
                        risks.append((target_id, link_prob))
                    elif target_type == "Outcome":
                        outcomes.append((target_id, link_prob))
                    elif target_type == "KPI":
                        kpis.append((target_id, link_prob))
                    else:
                        related_nodes.append((target_id, link_prob))

                    # Add relevant relations if both nodes are high scoring
                    if (node_id, target_id) in relations:
                        relevant_relations.append({"source": node_id, "target": target_id, "type": relations[(node_id, target_id)]})
                    elif (target_id, node_id) in relations:
                        relevant_relations.append({"source": target_id, "target": node_id, "type": relations[(target_id, node_id)]})

    def unique_entities(entity_list, top_k):
        unique_entities = []
        seen_names = set()
        for entity_id, score in sorted(entity_list, key=lambda x: x[1], reverse=True):
            entity_name = node_names[entity_id]
            if entity_name not in seen_names:
                properties = node_properties.get(entity_id, {})
                unique_entities.append((entity_name, properties, score))
                seen_names.add(entity_name)
            if len(unique_entities) >= top_k:
                break
        return unique_entities

    unique_recommendations = unique_entities(recommendations, top_k)
    unique_risks = unique_entities(risks, top_k)
    unique_outcomes = unique_entities(outcomes, top_k)
    unique_kpis = unique_entities(kpis, top_k)
    unique_related_nodes = unique_entities(related_nodes, top_k)

    return unique_recommendations, unique_risks, unique_outcomes, unique_kpis, unique_related_nodes, relevant_relations

def generate_description_with_groq(high_score_nodes, relevant_relations):
    content = []
    for node_name, properties, score in high_score_nodes:
        synonyms = properties.get("synonyms", [node_name])
        description = properties.get("description", "")
        definition = properties.get("definition", "")
        impact_level = properties.get("impact_level", "Moderate")
        category = properties.get("category", "N/A")
        example = properties.get("example", "N/A")
        mitigation_strategy = properties.get("mitigation_strategy", "N/A")
        relevant_stakeholders = ", ".join(properties.get("relevant_stakeholders", []))
        timeline_phase = properties.get("timeline_phase", "N/A")
        state = properties.get("state", "N/A")
        keywords = ", ".join(properties.get("keywords", []))
        tags = ", ".join(properties.get("tags", []))
        ontological_hierarchy = " > ".join(properties.get("ontological_hierarchy", []))
        source = properties.get("source", "N/A")
        importance_weight = properties.get("importance_weight", "N/A")
        confidence_level = properties.get("confidence_level", "N/A")
        risk_appetite = properties.get("risk_appetite", "N/A")
        trending_status = properties.get("trending_status", "N/A")

        content.append(
            f"{node_name} (Synonyms: {', '.join(synonyms)}): {definition}. {description} "
            f"Impact Level: {impact_level}. Category: {category}. Example: {example}. "
            f"Mitigation Strategy: {mitigation_strategy}. Relevant Stakeholders: {relevant_stakeholders}. "
            f"Timeline Phase: {timeline_phase}. State: {state}. Keywords: {keywords}. Tags: {tags}. "
            f"Ontological Hierarchy: {ontological_hierarchy}. Source: {source}. "
            f"Importance Weight: {importance_weight}. Confidence Level: {confidence_level}. "
            f"Risk Appetite: {risk_appetite}. Trending Status: {trending_status}."
        )

    for relation in relevant_relations:
        src_name = node_names[relation['source']]
        tgt_name = node_names[relation['target']]
        relation_type = relation['type']
        content.append(f"{src_name} {relation_type} {tgt_name}.")

    # Combine all content into a single text block
    combined_content = "\n".join(content)

    prompt = (
        "Here is some information about various aspects of the project:\n\n" +
        combined_content +
        "\n\nPlease create a coherent and well-structured summary of this information in a natural paragraph format. "
        "Avoid using technical terms like 'node' or 'relationship' and ensure the summary flows smoothly without appearing bot-generated."
    )

    # Call Groq's API for refinement
    chat_completion = client.chat.completions.create(
        messages=[{"role": "user", "content": prompt}],
        model="llama3-8b-8192",
    )
    return chat_completion.choices[0].message.content.strip()


def get_risk_management_recommendations(project_description, top_similar_nodes=5, top_recommendations=5, threshold=0.8):
    project_embedding = get_project_embedding(project_description)
    similar_nodes = find_similar_nodes(project_embedding, top_k=top_similar_nodes)
    strategies, risks, outcomes, kpis, related_nodes, relevant_relations = recommend_strategies_risks_outcomes_kpis(similar_nodes, top_k=top_recommendations, threshold=threshold)

    generated_description = generate_description_with_groq(related_nodes, relevant_relations)

    return strategies[:3], risks[:3], outcomes[:3], kpis[:3], generated_description

# Example usage
project_description = (
  ''' Our team is developing a cloud-based platform called BuildSmart to streamline project management, cost estimation, and risk assessment for large-scale construction projects. The platform will integrate data from various sources, including on-site sensors, project schedules, and financial records, to provide real-time insights into project progress, resource allocation, and budget tracking. Key components include advanced analytics for cost forecasting, a mobile application for on-site updates, and a dashboard for project managers to visualize project milestones and potential delays. The platform must adhere to industry standards such as OSHA for safety compliance and ISO 9001 for quality management. Data security and scalability are crucial, as the platform will handle sensitive data and must support multiple simultaneous projects. Potential risks include delays due to unexpected site conditions, supply chain disruptions, and regulatory changes. Ensuring accurate data collection and compliance with local buil'''
)

# Get recommendations and risks
recommended_strategies, related_risks, related_outcomes, related_kpis, generated_description = get_risk_management_recommendations(project_description)

# Output recommendations, risks, outcomes, KPIs, and generated description
print("Top 3 Recommended Risk Management Strategies and Confidence Scores:")
for strategy_name, properties, score in recommended_strategies:
    print(f"- {strategy_name}: {properties.get('description', 'No description')} (Confidence: {score:.2f})")

print("\nTop 3 Extracted Risks and Confidence Scores:")
for risk, properties, score in related_risks:
    print(f"- {risk}: {properties.get('description', 'No description')} (Confidence: {score:.2f})")

print("\nTop 3 Outcomes and Confidence Scores:")
for outcome_name, properties, score in related_outcomes:
    print(f"- {outcome_name}: {properties.get('description', 'No description')} (Confidence: {score:.2f})")

print("\nTop 3 KPIs and Confidence Scores:")
for kpi_name, properties, score in related_kpis:
    print(f"- {kpi_name}: {properties.get('key_metrics', 'No metrics available')} (Confidence: {score:.2f})")

print("\nGenerated Description from High-Scoring Related Nodes:")
print(generated_description)


  node_features = torch.tensor(node_embeddings, dtype=torch.float32)


Top 3 Recommended Risk Management Strategies and Confidence Scores:
- Engagement_Strategy: Strategies to engage stakeholders effectively throughout the project. (Confidence: 0.99)
- Project Risk Management (PRM): The strategic implementation of Project Risk Management (PRM) in SMEs entails aligning risk management activities with business objectives to maximize project value. As highlighted in the thesis, strategic RM encompasses identifying, analyzing, and responding to risks in a way that supports innovation and growth initiatives. This involves integrating PRM into the organizational culture and decision-making processes, which can enhance project resilience and adaptability. SMEs that adopt strategic PRM are better positioned to leverage their unique resources for competitive advantage, ultimately ensuring a higher success rate in project execution. (Confidence: 0.98)
- Implementation of OPM: This node focuses on the strategic implementation of the Organizational Project Management