In [21]:
from collections import defaultdict
import torch
import numpy as np


class UnionFind:
    def __init__(self, n):
        self.parent = list(range(n))
        self.rank = [0] * n

    def find(self, x):
        if self.parent[x] != x:
            self.parent[x] = self.find(self.parent[x])  # Path compression
        return self.parent[x]

    def union(self, x, y):
        root_x = self.find(x)
        root_y = self.find(y)
        if root_x != root_y:
            if self.rank[root_x] > self.rank[root_y]:
                self.parent[root_y] = root_x
            elif self.rank[root_x] < self.rank[root_y]:
                self.parent[root_y] = root_y
            else:
                self.parent[root_y] = root_x
                self.rank[root_x] += 1

def read_graph(file_path):
    """
    Reads a weighted directed graph from a file. Each line contains three values:
    start vertex, end vertex, and edge weight.

    Args:
        file_path (str): Path to the file containing the graph.

    Returns:
        edges (list): List of tuples representing directed edges (start, end, weight).
    """
    edges = []
    try:
        with open(file_path, 'r') as file:
            for line in file:
                line = line.strip()
                if line:  # Skip empty lines
                    start, end, weight = map(float, line.split())
                    edges.append((int(start), int(end), weight))
    except Exception as e:
        print(f"Error reading graph: {e}")
    return edges

def initialize_graph(edges):
    """Converts edge list to adjacency list and weights dictionary."""
    graph = defaultdict(list)
    weights = {}
    for u, v, w in edges:
        graph[u].append(v)
        weights[(u, v)] = w
    return graph, weights

def find_cycles_and_reduce(graph, weights, n):
    """Phase 1: Find cycles and reduce weights using a copy."""
    weights_copy = weights.copy()  # Work with a copy of weights
    removed_edges = set()
    removed_weights = {}
    
    while True:
        cycle = find_cycle(graph,n)
        if not cycle:  # No cycle found
            break

        # Ensure all edges in the cycle exist in the weights dictionary
        cycle = [(u, v) for u, v in cycle if (u, v) in weights_copy]

        if not cycle:  # If no valid cycle exists, continue
            continue

        min_weight = min(weights_copy[(u, v)] for u, v in cycle)

        for u, v in cycle:
            weights_copy[(u, v)] -= min_weight
            if weights_copy[(u, v)] <= 0:
                graph[u].remove(v)
                removed_edges.add((u, v))
                removed_weights[(u, v)] = weights[(u, v)]

    return removed_edges, removed_weights




from collections import deque

def find_cycle(graph, n):
    """Detect a cycle in the graph using DFS and return the cycle as a list of edges."""
    visited = [False] * n
    stack = [False] * n
    parent = [-1] * n

    def dfs(v):
        visited[v] = True
        stack[v] = True
        for neighbor in graph[v]:
            if not visited[neighbor]:
                parent[neighbor] = v
                cycle = dfs(neighbor)
                if cycle:
                    return cycle
            elif stack[neighbor]:
                # Found a cycle, reconstruct it
                cycle = []
                current = v
                while current != neighbor:
                    cycle.append((parent[current], current))
                    current = parent[current]
                cycle.append((parent[neighbor], neighbor))
                return cycle
        stack[v] = False
        return None

    for i in range(n):
        if not visited[i]:
            cycle = dfs(i)
            if cycle:
                return cycle
    return None

from collections import deque

def find_minimum_weight_cycle(graph, weights, n):
    """
    Find the minimum weight cycle in the graph using BFS.

    :param graph: Adjacency list representation of the graph.
    :param weights: Dictionary of edge weights.
    :param n: Total number of vertices in the graph.
    :return: List of edges representing the minimum weight cycle.
    """
    min_cycle_weight = float('inf')
    min_cycle = None

    for start in range(n):
        # Use BFS to explore all nodes starting from 'start' vertex
        queue = deque([(start, -1, 0)])  # (current_node, parent_node, current_weight)
        visited = {start: (None, 0)}  # {node: (parent, current_weight)}

        while queue:
            current, parent, current_weight = queue.popleft()

            for neighbor in graph[current]:
                if neighbor == parent:
                    continue

                weight = weights.get((current, neighbor), float('inf'))
                new_weight = current_weight + weight

                if neighbor in visited:
                    # Found a cycle
                    cycle_weight = new_weight - visited[neighbor][1] + weight
                    if cycle_weight < min_cycle_weight:
                        min_cycle_weight = cycle_weight
                        # Reconstruct the cycle
                        cycle = []
                        
                        # Trace back from the current node to reconstruct the cycle
                        x = current
                        while x != -1:
                            if visited[x][0] is None:
                                break
                            cycle.append((visited[x][0], x))
                            x = visited[x][0]

                        # Add the edge that completes the cycle
                        cycle.append((current, neighbor))

                        # Trace back from the neighbor node to reconstruct the rest of the cycle
                        y = neighbor
                        while y != -1 and y != current:
                            if visited[y][0] is None:
                                break
                            cycle.append((visited[y][0], y))
                            y = visited[y][0]

                        # Update the minimum cycle found
                        min_cycle = [(u, v) for u, v in cycle if u is not None]
                else:
                    # Mark the neighbor as visited and continue the BFS
                    visited[neighbor] = (current, new_weight)
                    queue.append((neighbor, current, new_weight))

    # If a cycle was found, reverse the list to maintain the correct order
    if min_cycle:
        min_cycle.reverse()

    return min_cycle if min_cycle else None







def check_and_readd_edges(graph, removed_edges, n):
    """Phase 2: Check and re-add edges if they do not create a cycle."""
    uf = UnionFind(n)
    for u in graph:
        for v in graph[u]:
            uf.union(u, v)

    readded_edges = set()
    f=sorted(list(removed_edges),reverse=True)
    for u, v in f:
        if uf.find(u) != uf.find(v):
            graph[u].append(v)
            uf.union(u, v)
            readded_edges.add((u, v))

    return removed_edges - readded_edges
def mwfas(file_path):
    """
    Main function to find Minimum Weighted Feedback Arc Set (MWFAS).
    :param file_path: Path to the file containing the graph.
    :return: A dictionary with metrics, updated graph, removed edges, and their weights.
    """
    edges = read_graph(file_path)
    n = max(max(u, v) for u, v, _ in edges) + 1
    graph, weights = initialize_graph(edges)

    # Original graph statistics
    total_edges = len(edges)
    total_weight = sum(w for _, _, w in edges)

    # Phase 1: Reduce cycles
  #  print("Before Phase 1:")
  #  print("Graph:", graph)
  #  print("Weights:", weights)
    
    removed_edges, removed_weights = find_cycles_and_reduce(graph, weights, n)
    
   # print("After Phase 1:")
   # print("Removed edges:", removed_edges)
   # print("Removed weights:", removed_weights)

    # Phase 2: Re-add edges (if applicable)
    removed_edges = check_and_readd_edges(graph, removed_edges, n)

    # Compute final metrics
    num_removed_edges = len(removed_edges)
    total_removed_weight = sum(removed_weights.get(edge, 0) for edge in removed_edges)

#    print("Final Results:")
#    print("Removed edges:", removed_edges)
#    print("Weights of removed edges:", {edge: removed_weights.get(edge, 0) for edge in removed_edges})
#    print("Final graph:", graph)

    # Return results
    return {
        "total_edges": total_edges,
        "total_weight": total_weight,
        "num_removed_edges": num_removed_edges,
        "removed_weight": total_removed_weight,
        "final_graph": graph,
        "removed_edges": removed_edges,
        "removed_weights": {edge: removed_weights.get(edge, 0) for edge in removed_edges},
    }

    

# Example usage




In [32]:
# After feedback arc set removal
def compute_vertex_rankings(graph, weights, n):
 #   print("in the input dag to topol sort ", "graph is ",graph, "and weights are ",weights)
    """
    Compute rankings for the vertices in a DAG.
    :param graph: Adjacency list of the DAG.
    :param weights: Dictionary of edge weights.
    :param n: Total number of vertices in the graph.
    :return: A list of rankings for the vertices.
    """
    # Step 1: Calculate in-degrees
    in_degree = [0] * n
    for u in graph:
        for v in graph[u]:
            in_degree[v] += 1

    # Step 2: Perform topological sort using a min-heap
    from heapq import heappop, heappush
    min_heap = []
    for i in range(n):
        if in_degree[i] == 0:
            heappush(min_heap, i)

    topological_order = []
    while min_heap:
        current = heappop(min_heap)
        topological_order.append(current)
        for neighbor in graph[current]:
            in_degree[neighbor] -= 1
            if in_degree[neighbor] == 0:
                heappush(min_heap, neighbor)

    # Step 3: Calculate outgoing edge weight sums for all vertices
    outgoing_weights = {v: 0 for v in range(n)}
    incoming_weights = {v: 0 for v in range(n)}

    for u in graph:
        for v in graph[u]:
            outgoing_weights[u] += weights.get((u, v), 0)
            incoming_weights[v] += weights.get((u, v), 0)


    # Step 4: Assign rankings
    rankings = [-1] * n
    current_rank = 0
    for vertex in topological_order:
        rankings[vertex] = current_rank
        current_rank += 1

    # Break ties for vertices with the same ranking based on outgoing edge weights
    tied_vertices = sorted(
        [(rankings[v], -(outgoing_weights[v] - incoming_weights[v]) / 
          (outgoing_weights[v] + incoming_weights[v] if outgoing_weights[v] + incoming_weights[v] > 0 else 1), v)
         for v in range(n)],
        key=lambda x: (x[0], x[1])
    )

    scores = [0] * n
    for final_rank, (_, _, vertex) in enumerate(tied_vertices):
        scores[vertex]=n-final_rank-1

    print(scores)
    return scores

# Ensure graph is a DAG after feedback arc set removal



In [33]:
def graph_to_adjacency_matrix(graph, weights, n):
    """
    Converts a graph represented as an adjacency list with weights to an adjacency matrix.

    :param graph: Adjacency list of the graph.
    :param weights: Dictionary of edge weights where the key is a tuple (u, v) and the value is the weight.
    :param n: Number of vertices in the graph.
    :return: A torch.FloatTensor adjacency matrix with weights.
    """
    adjacency_matrix = torch.zeros((n, n))
    for u in graph:
        for v in graph[u]:
            adjacency_matrix[u, v] = weights[(u, v)] # Weighted edge from u to v
  #  print("adjacency matrix= ",adjacency_matrix)
 #   print(adjacency_matrix)
    return adjacency_matrix




In [34]:
def calculate_upset_loss(adjacency_matrix, scores, style='ratio', margin=0.01):
    """
    Calculate the upset loss for the graph rankings using adjacency matrix and scores.

    :param adjacency_matrix: Torch FloatTensor adjacency matrix (n x n).
    :param scores: Torch FloatTensor ranking scores (n x 1).
    :param style: Type of upset loss ('naive', 'simple', 'ratio', or 'margin').
    :param margin: Margin for margin loss (default: 0.01).
    :return: Torch FloatTensor upset loss value.
    """
    epsilon = torch.FloatTensor([1e-8]).to(scores.device)  # For numerical stability
    M1 = adjacency_matrix - adjacency_matrix.T  # Pairwise weight differences

    # Normalize scores to [0, 1] range using (r_i + 1) / 2
    normalized_scores = (scores + 1) / 2

    T1 = normalized_scores - normalized_scores.T  # Pairwise score differences

    # Ensure scores is 2D
    if scores.ndim == 1:
        scores = scores.view(-1, 1)

    # Only consider actual edges (nonzero entries in the adjacency matrix)
    edge_indices = adjacency_matrix != 0

    if style == 'naive':
        upset = torch.sum((torch.sign(T1[edge_indices]) != torch.sign(M1[edge_indices]))) / torch.sum(edge_indices)
    elif style == 'simple':
        upset = torch.mean(torch.pow(torch.sign(T1[edge_indices]) - torch.sign(M1[edge_indices]), 2))
    elif style == 'margin':
        upset = torch.mean(torch.nn.ReLU()(-M1[edge_indices] * (T1[edge_indices] - margin)))
    elif style == 'ratio':
        T = T1 / (scores + scores.T + epsilon)  # Normalized pairwise score differences
        M = M1 / (adjacency_matrix + adjacency_matrix.T + epsilon)  # Normalized pairwise weight differences
        upset = torch.mean(torch.pow((M - T)[edge_indices], 2))

    return upset


 




In [35]:
def evaluate_upset_losses(file_path, rankings):
    """
    Evaluate upset losses (naive, simple, ratio, margin) for a graph and given rankings.

    :param file_path: Path to the graph file.
    :param rankings: List of rankings for the vertices.
    """
    # Step 1: Prepare Graph and Adjacency Matrix
    edges = read_graph(file_path)
    n = max(max(u, v) for u, v, _ in edges) + 1
    graph, weights = initialize_graph(edges)
    adjacency_matrix = graph_to_adjacency_matrix(graph, weights, n)

    # Step 2: Convert Rankings to Scores Tensor
    scores = torch.FloatTensor(rankings).view(-1, 1)

    # Step 3: Calculate Upset Losses
    naive_loss = calculate_upset_loss(adjacency_matrix, scores, style='naive').item()
    simple_loss = calculate_upset_loss(adjacency_matrix, scores, style='simple').item()
    ratio_loss = calculate_upset_loss(adjacency_matrix, scores, style='ratio').item()
    margin_loss = calculate_upset_loss(adjacency_matrix, scores, style='margin').item()

    # Step 4: Print Results
    print("Upset Losses for the Graph Rankings:")
    print(f"Naive Upset Loss: {naive_loss:.4f}")
    print(f"Simple Upset Loss: {simple_loss:.4f}")
    print(f"Differentiable Upset Loss (Ratio): {ratio_loss:.4f}")
    print(f"Upset Margin Loss: {margin_loss:.4f}")


In [46]:
# Main Execution
import time
start_time = time.time()
# Step 1: Read the graph and initialize structures
file_path = "Business_FM_All.txt"  # Replace with your dataset file
edges = read_graph(file_path)
n = max(max(u, v) for u, v, _ in edges) + 1
print("number of nodes= ",n," and number of edges= ",len(edges))
init_graph, init_weights = initialize_graph(edges)

#print("initial graph weights are", init_weights)

result = mwfas(file_path)
#print(f"Total edges in the graph: {result['total_edges']}")
#print(f"Sum of weights of the edges: {result['total_weight']:.2f}")
#print(f"Number of deleted edges: {result['num_removed_edges']}")
#print(f"Sum of weights of the deleted edges: {result['removed_weight']:.2f}")

# Step 2: Ensure the graph is a DAG by removing cycles
#removed_edges, removed_weights = find_cycles_and_reduce(graph, weights, n)
#removed_edges = check_and_readd_edges(graph, removed_edges, n)

# Remove `removed_edges` from the graph
new_graph=result['final_graph']
new_weights = {key: value for key, value in init_weights.items() if key not in result['removed_weights']}

#print("weights after removing some edges=",new_weights)
#print("graph after removing some edges=",new_graph)
# Step 3: Compute rankings for the vertices using the modified graph
final_rankings = compute_vertex_rankings(new_graph, new_weights, n)

# Step 4: Print the rankings
#print("Vertex Rankings:")
#for vertex, rank in enumerate(final_rankings):
#    print(f"Vertex {vertex}: Rank {rank}")

# Step 5: Evaluate upset losses
# Convert rankings to scores (Torch Tensor)
scores = torch.FloatTensor(final_rankings).view(-1, 1)

# Convert the graph to an adjacency matrix
adjacency_matrix = graph_to_adjacency_matrix(init_graph, init_weights,n)

# Calculate upset losses
naive_loss = calculate_upset_loss(adjacency_matrix, scores, style='naive').item()
simple_loss = calculate_upset_loss(adjacency_matrix, scores, style='simple').item()
ratio_loss = calculate_upset_loss(adjacency_matrix, scores, style='ratio').item()
margin_loss = calculate_upset_loss(adjacency_matrix, scores, style='margin').item()
# Print performance metrics
print("\nPerformance Metrics:")
print(f"Naive Upset Loss: {naive_loss:.4f}")
print(f"Simple Upset Loss: {simple_loss:.4f}")
print(f"Ratio Upset Loss: {ratio_loss:.4f}")
#print(f"Margin Upset Loss: {margin_loss:.4f}")
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Elapsed time: {elapsed_time} seconds")
#print(f"Differentiable Upset Loss (Ratio): {ratio_loss:.4f}")



number of nodes=  113  and number of edges=  2796
[93, 111, 109, 105, 110, 104, 108, 103, 107, 102, 106, 92, 100, 101, 91, 90, 89, 99, 88, 98, 95, 87, 86, 85, 84, 83, 82, 97, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 64, 71, 70, 69, 68, 45, 67, 66, 65, 46, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 44, 52, 51, 43, 96, 50, 49, 37, 48, 42, 47, 41, 40, 36, 35, 34, 39, 38, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 94, 5, 4, 3, 2, 1, 0, 112]

Performance Metrics:
Naive Upset Loss: 0.1005
Simple Upset Loss: 0.4020
Ratio Upset Loss: 0.7190
Elapsed time: 0.11309313774108887 seconds
