In [3]:
import pandas as pd

from google.colab import drive
drive.mount('/content/drive')

# === INPUTS ===
ranking_file = "/content/drive/MyDrive/35461047-Soroush-Ioannis-advancedimprove.csv"
#ranking_file = "/content/drive/MyDrive/start_8285_refine.xlsx"
edge_file = "/content/drive/MyDrive/connectome_graph.csv"

# === LOAD EDGES ===
df = pd.read_csv(edge_file)
df.columns = [col.strip() for col in df.columns]
df = df.rename(columns={
    'Source Node  ID': 'source',
    'Target Node ID': 'target',
    'Edge Weight': 'weight'
})

df['source'] = df['source'].astype(str).astype('int64')
df['target'] = df['target'].astype(str).astype('int64')

edges_raw = df[['source', 'target', 'weight']].values.tolist()
node_ids = sorted(set(df['source']) | set(df['target']))
node_to_index = {node_id: i for i, node_id in enumerate(node_ids)}
index_to_node = {i: node_id for node_id, i in node_to_index.items()}
edges = [(node_to_index[u], node_to_index[v], w) for u, v, w in edges_raw]
total_weight = sum(w for _, _, w in edges)

# === LOAD RANKING ===
init_df = pd.read_csv(ranking_file)
init_df.columns = [col.strip() for col in init_df.columns]
node_col = "Node ID"
rank_col = "Order"
#node_col = "node_id"
#rank_col = "rank"
init_df = init_df.drop_duplicates(subset=node_col, keep="first")
init_df[node_col] = init_df[node_col].astype(str).astype("int64")

init_ranking_raw = dict(zip(init_df[node_col], init_df[rank_col]))
default_rank = max(init_ranking_raw.values(), default=0) + 1
init_ranking = {node: init_ranking_raw.get(node, default_rank) for node in node_ids}

# === COMPUTE FORWARD WEIGHT + DEBUG ===

# Show total ranked nodes
print(f"üß† Number of nodes in graph: {len(node_ids)}")
print(f"üìã Number of nodes in ranking file: {len(init_ranking_raw)}")

# Check if any nodes were assigned default_rank
missing_nodes = [node for node in node_ids if node not in init_ranking_raw]
if missing_nodes:
    print(f"‚ö†Ô∏è {len(missing_nodes)} nodes missing in the ranking file. Showing up to 10:")
    print(missing_nodes[:10])
else:
    print("‚úÖ All graph nodes have ranks.")

# Compute and collect debug info
forward_weight_init = 0
sample_debug = []

for u, v, w in edges:
    node_u = index_to_node[u]
    node_v = index_to_node[v]
    rank_u = init_ranking[node_u]
    rank_v = init_ranking[node_v]

    is_forward = rank_u < rank_v
    if is_forward:
        forward_weight_init += w

    if len(sample_debug) < 10:
        sample_debug.append((node_u, rank_u, node_v, rank_v, is_forward, w))

# Print sample edge info
print("\nüîç Sample of edge evaluations (u ‚Üí v):")
for u, ru, v, rv, fwd, w in sample_debug:
    direction = "‚úì forward" if fwd else "‚úó backward"
    print(f"  ({u} ‚Üí {v}) with weight {w} ‚Äî ranks: {ru} ‚Üí {rv} ‚Üí {direction}")

# === PRINT RESULTS ===
print(f"\nüìä Initial forward edge weight: {forward_weight_init:.2f} / {total_weight:.2f}")
print(f"üìà Initial forward edge ratio: {forward_weight_init / total_weight:.6f}")
# === CHECK FOR DUPLICATE RANKS ===
from collections import defaultdict

rank_to_nodes = defaultdict(list)
for node, rank in init_ranking.items():
    rank_to_nodes[rank].append(node)

duplicates = {rank: nodes for rank, nodes in rank_to_nodes.items() if len(nodes) > 1}

if duplicates:
    print(f"\n‚ùå Detected {len(duplicates)} duplicate ranks in the initial ranking.")
    for rank, nodes in sorted(duplicates.items())[:10]:  # Show first 10
        print(f"  Rank {rank} is assigned to {len(nodes)} nodes: {nodes[:5]}{'...' if len(nodes) > 5 else ''}")
else:
    print("‚úÖ All initial ranks are unique.")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
üß† Number of nodes in graph: 136648
üìã Number of nodes in ranking file: 136648
‚úÖ All graph nodes have ranks.

üîç Sample of edge evaluations (u ‚Üí v):
  (720575940629970489 ‚Üí 720575940631267655) with weight 8 ‚Äî ranks: 108671 ‚Üí 114805 ‚Üí ‚úì forward
  (720575940629970489 ‚Üí 720575940620005542) with weight 16 ‚Äî ranks: 108671 ‚Üí 118315 ‚Üí ‚úì forward
  (720575940629970489 ‚Üí 720575940626081319) with weight 7 ‚Äî ranks: 108671 ‚Üí 114665 ‚Üí ‚úì forward
  (720575940629970489 ‚Üí 720575940652258806) with weight 5 ‚Äî ranks: 108671 ‚Üí 119916 ‚Üí ‚úì forward
  (720575940629970489 ‚Üí 720575940624508563) with weight 21 ‚Äî ranks: 108671 ‚Üí 113208 ‚Üí ‚úì forward
  (720575940629970489 ‚Üí 720575940620060838) with weight 8 ‚Äî ranks: 108671 ‚Üí 117910 ‚Üí ‚úì forward
  (720575940629970489 ‚Üí 720575940652336801) with weight 23 ‚Äî ranks: 108671 ‚

In [None]:
import pandas as pd

from google.colab import drive
drive.mount('/content/drive')

# === INPUT FILE ===
input_file = "/content/drive/MyDrive/35461047-Soroush-Ioannis-advancedimprove.csv"
output_file = "/content/drive/MyDrive/35461047-Soroush-Ioannis-advancedimprove.csv"

# === LOAD DATA ===
df = pd.read_csv(input_file)
df.columns = [col.strip() for col in df.columns]
df['Node ID'] = df['Node ID'].astype(str).str.strip()

# === SORT AND ASSIGN UNIQUE RANKS ===
df = df.sort_values(by='Order').reset_index(drop=True)
df['Order'] = range(len(df))

# === SAVE TO NEW FILE ===
df.to_csv(output_file, index=False)

print(f"‚úÖ Corrected ranking saved to: {output_file}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
‚úÖ Corrected ranking saved to: /content/drive/MyDrive/35461047-Soroush-Ioannis-advancedimprove.csv
