In [1]:
using CSV, DataFrames, LightGraphs, SimpleWeightedGraphs, GraphPlot, Compose, Colors, Metis, Statistics, Cairo, NamedArrays;

In [2]:
upstream_df = CSV.read("LC14 Upstream Connections.csv")
downstream_df = CSV.read("LC14 downstream Connections.csv");

### Clean up Missing Data

In [3]:
replace!(upstream_df["type_pre"], missing => "")
replace!(upstream_df["type_post"], missing => "")
replace!(downstream_df["type_pre"], missing => "")
replace!(downstream_df["type_post"], missing => "");

Sanity check. Every identified post-synapse to an LC14 neuron has a corresponding pre-synapse connection.

In [4]:
println(sum(downstream_df.type_post .== "LC14"))
println(sum(upstream_df.type_pre .== "LC14"))

98
98


### Lets turn this into a Graph

In [5]:
self_connections = downstream_df[downstream_df.type_post .== "LC14", :];
unique_ids = unique([self_connections["bodyId_pre"]; self_connections["bodyId_post"]]);

First let's have a look at how many of the total LC14 neurons we've been able to account for here.

In [6]:
@show length(unique_ids)
@show length(unique(upstream_df[upstream_df["type_post"].== "LC14",:]["bodyId_post"]));

length(unique_ids) = 58
length(unique((upstream_df[upstream_df["type_post"] .== "LC14", :])["bodyId_post"])) = 70


About 60/70, not bad.

Now let's asing each LC14 bodyId a simple 1:58 Id that we will use as identifiers in the graph

In [7]:
body_to_graph = Dict{Int, Int}();
sizehint!(body_to_graph, length(unique_ids));
for (i, id) in enumerate(unique_ids)
    body_to_graph[id] = i
end

graph_to_body = Dict{Int, Int}();
sizehint!(graph_to_body, length(unique_ids));
for (i, id) in enumerate(unique_ids)
    graph_to_body[i] = id
end

Now we can turn this into a graph

In [8]:
g = SimpleGraph(length(unique_ids));
for row in eachrow(self_connections)
    add_edge!(g, body_to_graph[row["bodyId_pre"]], body_to_graph[row["bodyId_post"]])
end

In [9]:
#draw(PDF("lc14_self_graph.pdf", 16cm, 16cm), gplot(g))

In [10]:
directed_g = DiGraph(length(unique_ids));
for row in eachrow(self_connections)
    add_edge!(directed_g, body_to_graph[row["bodyId_pre"]], body_to_graph[row["bodyId_post"]])
end

In [11]:
weighted_g = SimpleWeightedGraph(length(unique_ids));
for row in eachrow(self_connections)
    
    pre_id = body_to_graph[row["bodyId_pre"]]
    post_id = body_to_graph[row["bodyId_post"]]
    
    # add to edge if it already exists
    if weighted_g.weights[pre_id, post_id] > 0.
        weighted_g.weights[pre_id, post_id] += row["weight"]
    else # otherwise add the edge
        add_edge!(weighted_g, pre_id, post_id, row["weight"])

    end
end

In [12]:
#draw(PDF("lc14_weightedself_graph.pdf", 16cm, 16cm), gplot(g))

Can we find out where the connection is? There seems to be like lots of leggy connections (probably on account of the sparsity). For instance, are connections between left and right or between right and right?

We can find the locations of the presynapses and the postsynapses. wait they'll obviously be in the same place... What we want is like for a given neuron A that innervates B, How far away from neuron B is neuron A's general location. What we want is the distance from the centroid of the presynaptic neuron's postsynapses to the self-connection. 

In [13]:
synapse_locations = CSV.read("lc14_all_synapses.csv");

Go through each self-connection, find the centroid for the pre_neuron, and the location of the self-connection (or connections).

In [14]:
function centroid(id, synapse_locations)
    x = synapse_locations[synapse_locations["bodyId_post"] .== id, :].x_post
    y = synapse_locations[synapse_locations["bodyId_post"] .== id, :].y_post
    z = synapse_locations[synapse_locations["bodyId_post"] .== id, :].z_post
    return (mean(x), mean(y), mean(z))
end

centroid (generic function with 1 method)

In [15]:
function connection_location(post_id, pre_id, synapse_locations)
    
    pre_matches = synapse_locations["bodyId_pre"] .== pre_id
    post_matches = synapse_locations["bodyId_post"] .== post_id
    
    connections = synapse_locations[pre_matches .& post_matches, :]
    return (mean(connections.x_post), mean(connections.y_post), mean(connections.z_post))
end

connection_location (generic function with 1 method)

In [16]:
centroid_locations = fill((0.,0.,0.), size(self_connections)[1])
self_connection_locations = fill((0.,0.,0.), size(self_connections)[1])
i=1
for row in eachrow(self_connections)
    pre_id = row["bodyId_pre"]
    post_id = row["bodyId_post"]
    centroid_locations[i] = centroid(pre_id, synapse_locations)
    self_connection_locations[i] = connection_location(post_id, pre_id, synapse_locations)
    i += 1
end


All fine except there are nans in there? let's investigate why.

In [17]:
problem_neuron = self_connections[8,:]

Unnamed: 0_level_0,Column1,bodyId_pre,bodyId_post,roi,weight,type_pre,instance_pre,type_post
Unnamed: 0_level_1,Int64,Int64,Int64,String,Int64,String,String,String?
8,310,1316086414,1533338992,NotPrimary,1,LC14,LC14_R,LC14


In [18]:
synapse_locations[synapse_locations["bodyId_post"] .== problem_neuron.bodyId_pre, :]

Unnamed: 0_level_0,bodyId_pre,bodyId_post,roi_pre,roi_post,x_pre,y_pre,z_pre,x_post,y_post
Unnamed: 0_level_1,Int64,Int64,String?,String?,Int64,Int64,Int64,Int64,Int64
1,883514695,1316086414,LO(R),LO(R),2071,18651,20726,2069,18634
2,883514695,1316086414,LO(R),LO(R),97,17210,21791,105,17212
3,883514695,1316086414,LO(R),LO(R),1960,19891,24045,1967,19873
4,883514695,1316086414,LO(R),LO(R),1799,19045,21632,1816,19028
5,883514695,1316086414,LO(R),LO(R),1867,19873,23985,1880,19843
6,883514695,1316086414,LO(R),LO(R),2243,18134,22637,2271,18157
7,883514695,1316086414,LO(R),LO(R),1079,19792,23284,1093,19804
8,883514695,1316086414,LO(R),LO(R),1884,19871,24087,1882,19858
9,883514695,1316086414,LO(R),LO(R),2079,19854,23874,2088,19879
10,883514695,1316086414,LO(R),LO(R),1262,19743,23212,1264,19731


Seems like synapse_locations doesn't contain any record of the 1316086414 neuron. Why would this be?

In [19]:
size(synapse_locations[synapse_locations["bodyId_pre"] .== problem_neuron.bodyId_pre, :])

(24, 12)

It has lots of outputs, but no recorded inputs? Thats odd, and probably a result of it being cutoff.

Maybe it's not a problem, let's see how often this occurs.

In [20]:
sum(isnan.(getindex.(centroid_locations, 1)))

0

16 out of 98 isn't great, but not much we can do about it: best just to leave it out [Edit: now we've got all the synapses this is no longer a problem]

In [21]:
connections_with_input = .!(isnan.(getindex.(centroid_locations,1)))
centroid_locations_filtered = centroid_locations[connections_with_input]
self_connection_locations_filtered = centroid_locations[connections_with_input];

Now we have the data, let's save and plot it?

In [22]:
centroid_x = getindex.(centroid_locations_filtered,1)
centroid_y = getindex.(centroid_locations_filtered,2)
centroid_z = getindex.(centroid_locations_filtered,3)

self_x = getindex.(self_connection_locations_filtered,1)
self_y = getindex.(self_connection_locations_filtered,2)
self_z = getindex.(self_connection_locations_filtered,3)

out = DataFrame(centroid_x=centroid_x, centroid_y=centroid_y, centroid_z=centroid_z, self_x=self_x, self_y=self_y, self_z=self_z);


That's kind of odd, we have lots of repeats? I thought this was just centroid repeats because each input neuron inputs to many outputs, but actually it looks like the connection location is the same as well which isn't right...

The problem is in the self_connections dataframe, look here:

In [23]:
self_connections[3:4,:]

Unnamed: 0_level_0,Column1,bodyId_pre,bodyId_post,roi,weight,type_pre,instance_pre,type_post
Unnamed: 0_level_1,Int64,Int64,Int64,String,Int64,String,String,String?
1,66,1249940263,1158191887,LO(R),7,LC14,LC14,LC14
2,67,1249940263,1158191887,NotPrimary,1,LC14,LC14,LC14


There are two entries for the same input and output neuron, this is because they have different rois and the dataframe cannot save two different rois (not sure what not-primary means...).

We need to tidy up the self_connections so that we are only looking through unique pairings.

In [24]:
self_pre = self_connections["bodyId_pre"]
self_post = self_connections["bodyId_post"]
self_ids = unique(self_pre .=> self_post)

centroid_locations = fill((0.,0.,0.), length(self_ids))
self_connection_locations = fill((0.,0.,0.), length(self_ids))
i=1
for idpair in self_ids
    pre_id = idpair[1]
    post_id = idpair[2]
    centroid_locations[i] = centroid(pre_id, synapse_locations)
    self_connection_locations[i] = connection_location(post_id, pre_id, synapse_locations)
    i += 1
end

connections_with_input = .!(isnan.(getindex.(centroid_locations,1)))
centroid_locations_filtered = centroid_locations[connections_with_input]
self_connection_locations_filtered = self_connection_locations[connections_with_input];

centroid_x = getindex.(centroid_locations_filtered,1)
centroid_y = getindex.(centroid_locations_filtered,2)
centroid_z = getindex.(centroid_locations_filtered,3)

self_x = getindex.(self_connection_locations_filtered,1)
self_y = getindex.(self_connection_locations_filtered,2)
self_z = getindex.(self_connection_locations_filtered,3)

out = DataFrame(centroid_x=centroid_x, centroid_y=centroid_y, centroid_z=centroid_z, self_x=self_x, self_y=self_y, self_z=self_z);

In [25]:
#CSV.write("LC14_Inputspace_to_self_output_space.csv", out)

In [26]:
for neuron in unique_ids
    neuron_center =centroid(neuron, synapse_locations)
    pre_ids = self_connections[self_connections.bodyId_post .== neuron,:].bodyId_pre
    out = DataFrame(bodyId_pre=Int[], bodyId_post=Int[], roi_pre=String[], roi_post=String[], x_pre=Int[], y_pre=Int[], z_pre=Int[], x_post=Int[], y_post=Int[], z_post=Int[], confidence_pre=Float64[], confidence_post=Float64[])
    for presynaptic in pre_ids
        out = [out; synapse_locations[synapse_locations["bodyId_post"] .== presynaptic, :]]
    end
    
end

In [27]:
a = DataFrame(bodyId_pre=Int[], bodyId_post=Int[], roi_pre=String[], roi_post=String[], x_pre=Int[], y_pre=Int[], z_pre=Int[], x_post=Int[], y_post=Int[], z_post=Int[], confidence_pre=Float64[], confidence_post=Float64[])
[a; synapse_locations[1:4,:]]

Unnamed: 0_level_0,bodyId_pre,bodyId_post,roi_pre,roi_post,x_pre,y_pre,z_pre,x_post,y_post
Unnamed: 0_level_1,Int64,Int64,String?,String?,Int64,Int64,Int64,Int64,Int64
1,544038294,5813054748,LO(R),LO(R),4841,16294,28648,4841,16294
2,544038294,5813054748,LO(R),LO(R),5050,17297,28013,5065,17320
3,544038294,5812989312,LO(R),LO(R),6083,13951,27213,6111,13959
4,544038294,1503331732,LO(R),LO(R),3366,19414,30918,3385,19407


In [28]:
lc14_centroids = DataFrame(bodyId=Int[], x=Float64[], y=Float64[], z=Float64[])
for (i,neuron) in enumerate(unique_ids)
    push!(lc14_centroids, (neuron, centroid(neuron, synapse_locations)...))
end

In [29]:
for neuron in unique_ids
    self_connections[self_connections.bodyId_post .== neuron, :]
end

In [30]:
distance(p1::NTuple{3,<:Real}, p2::NTuple{3,<:Real}) = sqrt.( (p1[1] - p2[1])^2 + (p1[2] - p1[2])^2 + (p1[3] - p2[3])^2 )

function nearest_neighbours(neighbours, center)
    distances = zeros(Float64, size(neighbours)[1])
    for n in 1:size(neighbours)[1]
        distances[n] = distance((center.x[1], center.y[1], center.z[1]), (neighbours.x[n], neighbours.y[n], neighbours.z[n]))
    end
    return (neighbours[sortperm(distances),:], distances[sortperm(distances)])
end

nearest_neighbours (generic function with 1 method)

In [31]:
lc14_neighbours = NamedArray(Array{Int}(undef, length(unique_ids), length(unique_ids)))
setnames!(lc14_neighbours, ["ID"; string.([1:length(unique_ids)-1;])], 2)
for (i,neuron) in enumerate(lc14_centroids.bodyId)

    neuron_idx = lc14_centroids.bodyId .== neuron
    center = lc14_centroids[neuron_idx, :]
    neighbours = lc14_centroids[.!neuron_idx, :]
    (nearest, _) = nearest_neighbours(neighbours, center)
    lc14_neighbours[i, :] .= [neuron; nearest.bodyId]
end


In [32]:
connection_separation = zeros(Int, length(self_ids))
i=1
for (pre, post) in self_ids
    connection_separation[i] = findfirst(x-> x .== post, lc14_neighbours[lc14_neighbours[:,1] .== pre, :])[2] - 1
    i += 1
end

In [33]:
separations = DataFrame(pre_neuron=getindex.(self_ids,1), post_neuron=getindex.(self_ids,2), separation=connection_separation)
#CSV.write("self_connection_separations.csv", separations)

Unnamed: 0_level_0,pre_neuron,post_neuron,separation
Unnamed: 0_level_1,Int64,Int64,Int64
1,1158191887,1249940263,2
2,1158191887,1558605812,21
3,1249940263,1158191887,1
4,1249940263,1255405703,12
5,1249940263,1280980178,4
6,1249940263,1690283458,48
7,1316086414,1533338992,33
8,1316086414,5813033742,3
9,1316086414,5813054748,35
10,1348498693,1659235557,17


In [34]:
distances = zeros(Float64, length(self_ids))
i=1
for (pre, post) in self_ids
    pre_location = lc14_centroids[lc14_centroids.bodyId .== pre, 2:end]
    post_location = lc14_centroids[lc14_centroids.bodyId .== post, 2:end]
    distances[i] = distance((pre_location.x[1], pre_location.y[1], pre_location.z[1]), (post_location.x[1], post_location.y[1], post_location.z[1]))
    i+=1
end

In [39]:
neighbor_distance = DataFrame(pre_neuron=getindex.(self_ids,1), post_neuron=getindex.(self_ids,2), distance=distances)
#CSV.write("self_connection_distances.csv", neighbor_distance)
synapse_locations

Unnamed: 0_level_0,bodyId_pre,bodyId_post,roi_pre,roi_post,x_pre,y_pre,z_pre,x_post,y_post
Unnamed: 0_level_1,Int64,Int64,String?,String?,Int64,Int64,Int64,Int64,Int64
1,544038294,5813054748,LO(R),LO(R),4841,16294,28648,4841,16294
2,544038294,5813054748,LO(R),LO(R),5050,17297,28013,5065,17320
3,544038294,5812989312,LO(R),LO(R),6083,13951,27213,6111,13959
4,544038294,1503331732,LO(R),LO(R),3366,19414,30918,3385,19407
5,544038294,1503331732,LO(R),LO(R),3558,21189,30915,3578,21194
6,544038294,1470950640,LO(R),LO(R),6764,16355,30281,6764,16385
7,544038294,1441949046,LO(R),LO(R),3323,21978,29675,3334,21948
8,544038294,1161946749,LO(R),LO(R),3598,21456,29103,3599,21430
9,543692985,1659235318,missing,missing,5181,20189,28661,5167,20205
10,544038123,5813054748,LO(R),LO(R),6480,15752,29644,6473,15749


In [46]:
relative_synapses = reshape([], 0, 3);
for id in unique_ids
    # ask if there are any self_connections?


    if any(self_connections.bodyId_post .== id)

        centroid_location = collect(centroid(id, synapse_locations))
        pre_neurons = self_connections[self_connections.bodyId_post .== id,:]["bodyId_pre"]

        input_locations = reshape([], 0, 3);
        for pre in pre_neurons

            x = synapse_locations[synapse_locations["bodyId_post"] .== pre, :].x_post # input neuron's input locations (x)
            y = synapse_locations[synapse_locations["bodyId_post"] .== pre, :].y_post
            z = synapse_locations[synapse_locations["bodyId_post"] .== pre, :].z_post
            input_locations = [input_locations; [x y z]]
        end

        relative_synapses = [relative_synapses; input_locations .- centroid_location']

    end
end 



In [48]:
relative_self_synapses = DataFrame(x=relative_synapses[:,1], y=relative_synapses[:,2], z=relative_synapses[:,3])
#CSV.write("relative lc14 input locations.csv", relative_self_synapses)

"relative lc14 input locations.csv"