## Take in csv file of connections and convert to pajek file format for infomap input

In [9]:
import pandas as pd

df = pd.read_csv("connections_princeton.csv")

In [4]:
# 📊 Step 4: Create a mapping of unique neuron IDs to vertex indices
# Combine all unique IDs from pre and post columns
unique_ids = pd.unique(df[['pre_root_id', 'post_root_id']].values.ravel())
id_to_index = {neuron_id: idx + 1 for idx, neuron_id in enumerate(unique_ids)}

# Optional: Check how many unique neurons
print(f"Total unique neurons: {len(unique_ids)}")


Total unique neurons: 138584


In [5]:
# Step 5: Write to Pajek .net file
net_path = "fly_connectome.net"

with open(net_path, 'w') as f:
    # Write vertices
    f.write(f"*Vertices {len(unique_ids)}\n")
    for neuron_id, idx in id_to_index.items():
        f.write(f'{idx} "{neuron_id}"\n')

    # Write edges (directed graph)
    f.write("\n*Edges\n")
    for _, row in df.iterrows():
        pre = id_to_index[row['pre_root_id']]
        post = id_to_index[row['post_root_id']]
        weight = row['syn_count']
        f.write(f"{pre} {post} {weight}\n")

print(f"Pajek .net file written to: {net_path}")

Pajek .net file written to: fly_connectome.net


In [6]:
# Step 6: Validate output
# Check a few lines of the file
with open(net_path, 'r') as f:
    for i in range(20):  # Print first 20 lines
        print(f.readline().strip())

*Vertices 138584
1 "720575940625363947"
2 "720575940623224444"
3 "720575940630432382"
4 "720575940618518557"
5 "720575940627314521"
6 "720575940626337738"
7 "720575940620280405"
8 "720575940620204726"
9 "720575940636942447"
10 "720575940613789411"
11 "720575940608359298"
12 "720575940613493733"
13 "720575940640167477"
14 "720575940629440518"
15 "720575940622800116"
16 "720575940628108309"
17 "720575940637518960"
18 "720575940614827922"
19 "720575940615643835"
