# Assortivity Analysis of the oviIN connectome
This will need several things:
    
1. assortivity of the wholebrain data
2. assortivity of the oviIN connectome
3. filtering and assortivity analysis for each of the clusters at a higher resolution
4. plotting a line plot that visibily shows where the different clusters fall (resolutions could be plotted by color?)

In [2]:
from util import *
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import networkx as nx
from neuprint import Client, fetch_adjacencies, NeuronCriteria as NC


auth_token_file = open("/Users/rw2822/Documents/GitHub/flybrain-clustering/flybrain.auth.txt", 'r')
auth_token = next(auth_token_file).strip()
try:
    np_client = Client('neuprint.janelia.org', dataset='hemibrain:v1.2.1', token=auth_token)
    print("Connected to neuprint")
except:
    np_client = None

Connected to neuprint


In [3]:
pwd

'/Users/rw2822/Documents/GitHub/flybrain-clustering'

## Pulling wholebrain data for assortivity

In [None]:
# Import file, create dataframe and create networkx graph
hemibrain_version = "v1.2"
direct = "exported-traced-adjacencies-v1.2/"
filename = "traced-roi-connections.csv"

log_msg("Loading node dataframe")
wb = pd.read_csv(os.path.join(direct, filename))
log_msg("Done!")
wb = wb.groupby(['bodyId_pre', 'bodyId_post'], as_index=False)['weight'].sum()
wb

2024 03 25 22:14:03  Loading node dataframe
2024 03 25 22:14:04  Done!


Unnamed: 0,bodyId_pre,bodyId_post,weight
0,200326126,264083994,3
1,200326126,295816140,5
2,200326126,296203440,1
3,200326126,325122109,2
4,200326126,326474963,1
...,...,...,...
3550398,7112622236,328283521,1
3550399,7112622236,357932060,1
3550400,7112622236,357940977,1
3550401,7112622236,358631450,1


In [4]:
wbG = nx.from_pandas_edgelist(wb, 'bodyId_pre', 'bodyId_post', edge_attr='weight', create_using=nx.DiGraph())

In [5]:
assort_wb = nx.degree_assortativity_coefficient(wbG)

# Planning process to do this for each of the clusters
1. Seperate per cluster and isolate bodyIDs \
    a. Pull in partition data\
    b. Filter per cluster\
    c. isolate bodyIds
2. Use bodyIds to pull adjacencies to and from that list of bodyIds
3. Use edgelist to calculate assortivity and add value to dataframe
4. Plot values in line plot


In [4]:
# Pull in full dataframe connectome data
full = pd.read_csv(os.path.join('oviIN_combined/preprocessed-v1.2.1/', 'preprocessed_nodes.csv'), index_col=0)

In [5]:
# Filter by cluster at 0.0 resolution
cluster1 = full[full['0.1'] == 1]
cluster2 = full[full['0.0'] == 2]
cluster3 = full[full['0.0'] == 3]
cluster4 = full[full['0.0'] == 4]
cluster5 = full[full['0.0'] == 5]
cluster6 = full[full['0.0'] == 6]

In [6]:
cluster1_ids = cluster1.index.tolist()
cluster1_ids

[1013425227]

In [7]:
# Fetch the adjacencies for the cluster
log_msg("Fetching adjacencies")
_, edgelist_1 = fetch_adjacencies(NC(bodyId=cluster1_ids))
log_msg("Done!")

2024 06 11 12:42:56  Fetching adjacencies
2024 06 11 12:42:56  Done!


In [8]:
edgelist_1

Unnamed: 0,bodyId_pre,bodyId_post,roi,weight


In [64]:
# Instantiate nx item
cluster1G = nx.from_pandas_edgelist(edgelist_1, 'bodyId_pre', 'bodyId_post', edge_attr='weight', create_using=nx.DiGraph())
assort_c1 = nx.degree_assortativity_coefficient(cluster1G)


invalid value encountered in scalar divide



In [12]:
assort_c1

-0.2133818498437977

In [13]:
# Add the degree assortativity to a dataframe that has row names and a column of assortivity values
assort_df = pd.DataFrame(columns=['assortativity'], index=['whole', '0.0_1', '0.0_2', '0.0_3', '0.0_4', '0.0_5', '0.0_6'])
assort_df.loc['whole'] = -0.022964
assort_df.loc['0.0_1'] = assort_c1

# Print dataframe
assort_df

Unnamed: 0,assortativity
whole,-0.022964
0.0_1,-0.213382
0.0_2,
0.0_3,
0.0_4,
0.0_5,
0.0_6,


In [9]:
# Create a function out of it to make it easier to run on all clusters
def assortativity(cluster, x):
    cluster_ids = cluster.index.tolist()
    _, edgelist = fetch_adjacencies(NC(bodyId=cluster_ids))
    print(x)
    if len(edgelist) <= 10:
        return np.nan
    clusterG = nx.from_pandas_edgelist(edgelist, 'bodyId_pre', 'bodyId_post', edge_attr='weight', create_using=nx.DiGraph())
    assort = nx.degree_assortativity_coefficient(clusterG)
    return assort

# Using this function from jaccard sim to get the right clusters
def get_clusters(df, resolution):
    cluster_list = df[resolution].unique().tolist()
    return cluster_list

# Create function that does the whole process of dividing clusters and calculating assortativity
def main_assort(df, resolution):
    cluster_list = get_clusters(df, resolution)
    assort_df = pd.DataFrame(columns=['assortativity'], index=[f'{resolution}_{x}' for x in cluster_list])
    for i, x in enumerate(cluster_list):
        cluster = df[df[resolution] == x]
        assort = assortativity(cluster, x)
        assort_df.loc[f'{resolution}_{x}'] = assort
    return assort_df

                                       

In [48]:
# Run function for 0.0 resolution and 6 clusters
full_df = main_assort(full, '0.0')

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

1


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

2


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

3


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

4


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

5


  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

6


In [49]:
full_df

Unnamed: 0,assortativity
0.0_1,-0.213382
0.0_2,-0.157566
0.0_3,-0.181511
0.0_4,-0.246467
0.0_5,-0.192219
0.0_6,-0.176241


In [19]:
# Get cluster numbers for 0.1 
chi01 = '0.1'
chi05 = '0.5'
cluster_01 = get_clusters(full, chi01)
cluster_05 = get_clusters(full, chi05)

In [14]:
len(cluster_01)

1598

In [20]:
df_full01 = main_assort(full, chi01)

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

1


  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

2


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

3


  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

4


  0%|          | 0/3 [00:00<?, ?it/s]

5


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

6


  0%|          | 0/2 [00:00<?, ?it/s]

7
8


  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

9
10
11
12


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

13
14
15
16
17
18
19
20
21
22
23


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

24
25
26
27
28
29
30
31
32
33
34
35
36
37
38


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


120
121


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


122
123
124
125
126
127
128
129
130
131


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


132
133
134
135
136
137
138
139


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


140
141
142
143
144
145


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


146
147
148
149
150


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


151
152
153
154
155
156
157
158
159
160


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


161


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


162
163
164
165
166
167


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


168
169
170
171
172
173
174
175
176
177
178
179


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


180
181


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


182
183
184
185
186


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


187
188
189
190
191
192
193
194


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


195
196
197
198
199
200
201
202
203
204
205
206
207


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


208
209
210
211
212
213
214
215


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


216
217
218
219


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


250
251
252
253
254
255
256
257
258
259
260


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


261
262


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


263
264


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


265
266
267


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


268


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


269
270


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


271
272


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


273
274


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


275
276


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


277
278
279
280
281
282
283
284
285
286
287
288
289
290


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


291
292
293
294
295
296
297
298
299
300


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


332


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


333
334
335
336
337
338
339
340
341
342


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


420
421
422
423
424
425
426
427
428
429
430
431
432
433


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


434
435
436
437


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


489
490
491
492
493
494
495
496
497
498
499
500
501
502


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


556
557
558
559
560


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


561
562
563
564


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


611
612
613
614
615
616
617
618
619
620
621
622


In [83]:
df_full01_1 = main_assort(full, chi05)

TypeError: main_assort() takes 2 positional arguments but 3 were given

In [50]:
full_df = pd.concat([full_df, df_full01])

In [51]:
# Append the whole connectome assortativity to the dataframe at the top of the dataframe
full_df.loc['whole'] = -0.022964

# Drop the rows that have NaN values
full_df = full_df.dropna()

In [58]:
# Create a numberline plot for the values using plotly
import plotly.graph_objects as go

fig = go.Figure()
# Set layout
fig.update_layout(
    title='Assortativity Numberline Plot',
    yaxis=dict(
        showgrid=False,
        showticklabels=False,
        zeroline=True,
        zerolinewidth=2,
        zerolinecolor='black',
    ),
    xaxis=dict(
        title='Assortativity',
        showgrid=False,
        zeroline=False,
        showticklabels=True,
        tickwidth=10,
        range=[-.6, 0.4],
    ),
    
)
fig.update_yaxes(range=[-0.2, 0.3])
fig.update_xaxes(ticks='inside')

# Add the values to the plot
for i in range(6):
    fig.add_trace(go.Scatter(x=[full_df.iloc[i, 0]], y=[0], mode='markers', marker=dict(size=5), name=full_df.index[i]))

# Add the rest of the values as squares
for i in range(6, 73):
    fig.add_trace(go.Scatter(x=[full_df.iloc[i, 0]], y=[0], mode='markers', marker=dict(size=5, symbol='square'), name=full_df.index[i]))

# Add whole connectome value as a black triangle size 15
fig.add_trace(go.Scatter(x=[full_df.iloc[-1, 0]], y=[0], mode='markers', marker=dict(size=10, symbol='triangle-up', color='black'), name=full_df.index[-1]))  


# Show the plot
fig.show()

