# Algorithms.md

-------

-------

# Describe Approach

## Algorithm

**Description:** 

We first construct a similarity graph using the input data points.  Then we compute the Laplacian and its eigenvectors to do dimensionality reduction in order to make the clusters more obvious.  Lastly we use a classical clustering algorithm (e.g. k-means) to obtain the clusters.

**Input:** 
  1. Similarity matrix
  2. number of clusters

**Output:** Clusters

### Pseudocode

**function** connectivity (similarity_matrix, num_clusters)
  1. Construct similarity graph.
  2. Compute Laplacian.
  3. Obtain the first $k$ eigenvectors of L, where $k = num\_clusters$.
  4. Create matrix U composed of the first $k$ eigen vectors as columns.
  5. **for** i = 1, ..., n
      1. let $y_i$ be  the vector corresponding to the ith row of U.
      2. cluster the $n$ number of $y_i$ vectors using k-means into clusters $C_1, ..., C_k$
  6. **endfor**
  7. return clusters $A_1, ..., A_k$ where $A_i = \{j \ | \ y_j \in C_i\}$
**endfunction**
  
-------

-------


In [1]:
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot
from plotly.graph_objs import *
from plotly import tools
import plotly

import os
#os.chdir('C:/Users/L/Documents/Homework/BME/Neuro Data I/Data/')

import csv,gc  # garbage memory collection :)

import numpy as np
from numpy import linalg as LA
# import matplotlib.pyplot as plt
# from mpl_toolkits.mplot3d import axes3d

# from mpl_toolkits.mplot3d import axes3d
# from collections import namedtuple

import csv
import re
import matplotlib
import time
import seaborn as sns

from collections import OrderedDict

In [2]:
import networkx as nx
import math

In [3]:
# from matplotlib.pyplot import *
# import matplotlib.pyplot as plt
# %matplotlib inline

In [4]:
plotly.offline.init_notebook_mode()

In [5]:
def plot_connectivity(dictionary):
    current_palette = sns.color_palette("husl", len(dictionary.keys()))
    Xe = []
    Ye = []
    data = []
    avg_dict = OrderedDict()
    i = 0
    for key, region in dictionary.iteritems():
        X = []
        Y = []
#         Z = []
        tmp_x = []
        tmp_y = []
        
        region_col = current_palette[i]
        region_col_lit = 'rgb' + str(region_col)
        i += 1
        for coord in region:    
            X.append(coord[0])
            Y.append(coord[1])
            tmp_x.append(coord[0])
            tmp_y.append(coord[1])
        avg_dict[key] = [[np.mean(tmp_x), np.mean(tmp_y)]]
            
        trace_scatter = Scatter(
                x = X, 
                y = Y,
                name=key,
                mode='markers',
                marker=dict(
                    size=10,
                    color=region_col_lit, #'purple',                # set color to an array/list of desired values
                    colorscale='Viridis',   # choose a colorscale
                    opacity=0.5
                )
        )
        avg_scatter = Scatter(
                x = [avg_dict[key][0][0]],
                y = [avg_dict[key][0][1]],
                mode='markers',
                name=key+'_avg',
                marker=dict(
                    size=10,
                    color=region_col_lit,
                    colorscale='Viridis',
                    line=dict(
                        width = 2,
                        color = 'rgb(0, 0, 0)'
                    )
                )
        )
        data.append(trace_scatter)
        data.append(avg_scatter)
        
    locations = avg_dict.keys()
#     print locations
    for i, key in enumerate(avg_dict):
#         if i + 1 == len(locations):
#             continue
#         print 'start' + str(i)
        tmp = []
        for j in range(len(locations)):
            if j == i:
                continue
            p1 = np.asarray(avg_dict[key][0])
            p2 = np.asarray(avg_dict[locations[j]][0])
            dist = LA.norm(p1 - p2)
#             dist = (math.pow(avg_dict[key][0][0]-avg_dict[locations[j]][0][0],2) + 
#                        math.pow(avg_dict[key][0][1]-avg_dict[locations[j]][0][1],2))
            tmp.append(dist)
#             print dist
        newmin = tmp.index(min(tmp))
        if newmin >= i:
            newmin += 1
#         print newmin
        print "region " + key + ": " + locations[newmin]
        tmp2 = avg_dict.keys()[newmin]
        Xe+=[avg_dict[key][0][0],avg_dict[tmp2][0][0],None]
        Ye+=[avg_dict[key][0][1],avg_dict[tmp2][0][1],None]
#         Ze+=[dictionary[key][0][2],dictionary[tmp2][0][2],None]
#     print Xe
#     print Ye
    
    trace_edge = Scatter(x=Xe,
               y=Ye,
               mode='lines',
               line=Line(color='rgb(0,0,0)', width=3),
               hoverinfo='none'
    )

    data.append(trace_edge)
    
    layout = Layout(
#         margin=dict(
#             l=0,
#             r=0,
#             b=0,
#             t=0
#         ),
        paper_bgcolor='rgb(255,255,255)',
        plot_bgcolor='rgb(255,255,255)'
    )
        
    fig = Figure(data=data, layout=layout)
    iplot(fig, validate=False)

In [6]:
# Fix random seed
np.random.seed(123456789)

In [10]:
# create 2D plot showing algorithm
# 4 'regions' indicated by 4 x 100 points: a,b,c,d
# obtain the average coordinates for each region to obtain the representative point for each region
# and connect the 4 points such that if any of the points were chosen as starting point, it would only connect to nearest 'region'
# loop through all points and set them as starting point, connectivity map will self-terminate when the last two points point to each other as mutual 'nearest regions'

# test with spherical gaussian mixture
np.random.seed(123456789)
a_norm = 0.5 * np.random.randn(100, 2) + np.array([-0.1,0])
b_norm = 0.5 * np.random.randn(100, 2) + np.array([0,2.1])
c_norm = 0.5 * np.random.randn(100, 2) + np.array([2,2.1])
d_norm = 0.5 * np.random.randn(100, 2) + np.array([2.1,0])

norm_dict = OrderedDict([('a',a_norm),('b',b_norm),('c',c_norm),('d',d_norm)])

In [11]:
print(norm_dict)

OrderedDict([('a', array([[ 1.006451  ,  1.0641989 ],
       [ 0.8208557 ,  0.04119124],
       [ 0.32948184, -0.41300821],
       [ 0.47863526,  0.68795757],
       [ 0.37151423,  0.4239353 ],
       [ 0.16484571, -0.28470234],
       [ 0.31590228,  0.1167982 ],
       [-0.52383517, -0.83833091],
       [-0.08333869,  0.19022234],
       [-0.50144229, -0.41755951],
       [-0.63441722,  0.09496558],
       [-0.12776851,  0.53122131],
       [ 0.18982338, -0.16011092],
       [-0.23487792,  0.16160696],
       [ 0.5871681 , -0.10247157],
       [ 0.03806611,  0.08127989],
       [-0.31783672,  0.04821044],
       [-1.04342554,  0.29042324],
       [ 1.54362302,  0.10443931],
       [-0.35985775,  1.01212831],
       [ 0.05981261, -0.27114598],
       [-0.33317497,  0.18474284],
       [-0.26328924,  0.56853954],
       [-0.96568139, -0.62634335],
       [-0.10951273,  0.4256104 ],
       [-0.31539334, -0.06532803],
       [-0.58567849, -0.8594762 ],
       [-0.45342023, -0.31839017],
 

In [12]:
# connected points are regions a to b to c to d
plot_connectivity(norm_dict)

region a: b
region b: c
region c: b
region d: c


In [13]:
print a_norm.shape
print b_norm.shape
print c_norm.shape
print d_norm.shape

(100, 2)
(100, 2)
(100, 2)
(100, 2)


In [14]:
# connect the 400 points using epsilon ball 
radius = 0.4
allpt = np.concatenate([a_norm,b_norm,c_norm,d_norm])

In [15]:
print allpt.shape
print allpt[1]

(400, 2)
[ 0.8208557   0.04119124]


In [16]:
G=nx.Graph()

In [17]:
# generate networkx graph object to obtain adjacency matrix, edges have weight of 1, 
# with epsilon ball radius according to above
for i in range(len(allpt)):
    G.add_node(str(i),pos=allpt[i],region=i/100)
    for j in range(i+1,len(allpt)):
        dist = LA.norm(allpt[i] - allpt[j])
        if dist < radius:
            G.add_edge(str(i),str(j),distance=dist)

In [18]:
print len(G.nodes())
print len(G.edges())
# print G.nodes()
# nx.write_graphml(G, "connectivity.graphml")

400
3236


In [19]:
G.node['344']['region']

3

In [20]:
def plot_graphml(G):
    current_palette = sns.color_palette("husl", len(G.nodes())/100)
    Xe = []
    Ye = []
    data = []
    i = 0
    
    X = []
    Y = []
    regiondict = {}
    for r, node in enumerate(G.nodes()):
        tmp = G.node[node]
#         print tmp
        pos = tmp['pos']
        region = tmp['region']
        if str(region) not in regiondict:
            regiondict[str(region)] = [pos]
        else:
            tmp = regiondict[str(region)]
            tmp.append(pos)
            regiondict[str(region)] = tmp
#         print region
#         print pos
#     print regiondict
    for region, reg in enumerate(regiondict):
        for pos in regiondict[reg]:
#             print pos
            X.append(pos[0])
            Y.append(pos[1])
                
        region_col = current_palette[region]
        region_col_lit = 'rgb' + str(region_col)
        
        trace_scatter = Scatter(
                x = X, 
                y = Y,
                name=region,
                mode='markers',
                marker=dict(
                    size=10,
                    color=region_col_lit, #'purple',                # set color to an array/list of desired values
                    colorscale='Viridis',   # choose a colorscale
                    opacity=0.5
                )
        )
        data.append(trace_scatter)
        X = []
        Y = []
        
    for r, edge in enumerate(G.edges()):
        firstpt = G.node[edge[0]]
        secondpt = G.node[edge[1]]
#         print firstpt
        dist = LA.norm(firstpt['pos'] - secondpt['pos'])
#         tmp.append(dist)
#         print dist

        Xe+=[firstpt['pos'][0],secondpt['pos'][0],None]
        Ye+=[firstpt['pos'][1],secondpt['pos'][1],None]
#         Ze+=[dictionary[key][0][2],dictionary[tmp2][0][2],None]
#     print Xe
#     print Ye
    
    trace_edge = Scatter(x=Xe,
               y=Ye,
               mode='lines',
               line=Line(color='rgb(0,0,0)', width=2),
               hoverinfo='none'
    )

    data.append(trace_edge)
    
    layout = Layout(
#         margin=dict(
#             l=0,
#             r=0,
#             b=0,
#             t=0
#         ),
        paper_bgcolor='rgb(255,255,255)',
        plot_bgcolor='rgb(255,255,255)'
    )
        
    fig = Figure(data=data, layout=layout)
    iplot(fig, validate=False)

In [21]:
# plot the points and edges of the networkx graph object with epsilon ball of radius = 0.4
plot_graphml(G)

In [22]:
# A = nx.adjacency_matrix(G)
A = nx.to_numpy_matrix(G) 
A2 = nx.adjacency_matrix(G)
nodelist = G.nodes()

In [23]:
print A2.shape

(400, 400)


In [24]:
print len(nodelist)

400


In [25]:
# apply eigen decomposition to obtain the eigenvalues (diagonal matrix D) and the eigenvectors (n x n matrix V)
D, V = LA.eig(A)
D = np.diagflat(D)

# following lines in this cell aren't necessary, just used to verify that V * D * V^-1 = A, where A is the adjacency matrix
b = np.matrix(V)*np.matrix(D)*np.matrix(LA.inv(V))
# out = np.dot(V, D, LA.inv(V))
dotm = lambda *args: reduce(np.dot, args)
out = dotm(V, D, LA.inv(V))

In [26]:
# verification of whether A = V * D * V^-1 is true
np.allclose(A,b)

True

In [27]:
# print V
np.savetxt('eigenvectorA.txt',V,delimiter="\t")

In [25]:
import os
cwd = os.getcwd()

In [26]:
print V.shape

(400, 400)


In [27]:
a_mean = np.zeros((1,2))  
b_mean = np.zeros((1,2))  
c_mean = np.zeros((1,2))  
d_mean = np.zeros((1,2))  

a_num = 0
b_num = 0
c_num = 0
d_num = 0

for ind, i in enumerate(nodelist):
    region = int(float(i))/100
    if region == 0:
        a_mean = np.add(V[ind,0:2],a_mean)
        a_num += 1
    elif region == 1:
        b_mean = np.add(V[ind,0:2],b_mean)
        b_num += 1
    elif region == 2:
        c_mean = np.add(V[ind,0:2],c_mean)
        c_num += 1
    elif region == 3:
        d_mean = np.add(V[ind,0:2],d_mean)
        d_num += 1
a_mean = a_mean/100.
b_mean = b_mean/100.
c_mean = c_mean/100.
d_mean = d_mean/100.

print a_num # check to make sure each cluster has 100 points
# print b_num
# print c_num
# print d_num

100


In [28]:
print a_mean.shape

(1, 2)


In [29]:
test = np.column_stack((np.transpose(a_mean),np.transpose(b_mean),np.transpose(c_mean),np.transpose(d_mean)))
# test = np.column_stack((a_mean, b_mean, c_mean, d_mean))
print test.shape

(2, 4)


In [30]:
# print test
import scipy

In [31]:
names = ['a','b','c','d']
for i in range(len(np.transpose(test))):
    dists = []
    dists2 = []
    print 'region ' + names[i]
    for j in range(len(np.transpose(test))):
        if i == j:
            continue
        dist = LA.norm(test[:,i] - test[:,j])
#         dist2 = scipy.spatial.distance.euclidean(test[:,i],test[:,j]) # alternative to finding euclidean distance
        dists.append(dist)
        dists2.append(dist2)
    print dists
#     print dists2
    near = dists.index(min(dists))
#     print np.exp(dists[near])
    if near >= i:
        near += 1
    print 'Nearest connection is between region ' + names[i] + ' and ' + names[near] + "\n"

region a


NameError: name 'dist2' is not defined

## Spectral Embedding

In [32]:
from sklearn.manifold import spectral_embedding as se

In [33]:
a2out = se(A2,n_components=2,drop_first=True)
print a2out.shape

(400, 2)



Graph is not fully connected, spectral embedding may not work as expected.



In [34]:
print nodelist.index('0')

1


In [35]:
print(A2)

  (0, 6)	1
  (0, 7)	1
  (0, 63)	1
  (0, 123)	1
  (0, 189)	1
  (0, 243)	1
  (0, 251)	1
  (0, 252)	1
  (0, 354)	1
  (0, 363)	1
  (0, 374)	1
  (1, 105)	1
  (1, 140)	1
  (1, 174)	1
  (1, 192)	1
  (2, 91)	1
  (2, 126)	1
  (2, 157)	1
  (2, 196)	1
  (2, 235)	1
  (2, 237)	1
  (2, 264)	1
  (2, 320)	1
  (3, 197)	1
  (3, 301)	1
  :	:
  (398, 185)	1
  (398, 221)	1
  (398, 269)	1
  (398, 337)	1
  (398, 351)	1
  (398, 399)	1
  (399, 50)	1
  (399, 54)	1
  (399, 55)	1
  (399, 58)	1
  (399, 75)	1
  (399, 76)	1
  (399, 117)	1
  (399, 136)	1
  (399, 144)	1
  (399, 146)	1
  (399, 148)	1
  (399, 183)	1
  (399, 184)	1
  (399, 221)	1
  (399, 266)	1
  (399, 269)	1
  (399, 337)	1
  (399, 351)	1
  (399, 398)	1


In [36]:
unique, counts = np.unique(np.asarray(A)[:,1], return_counts=True)
dict(zip(unique, counts))

{0.0: 396, 1.0: 4}

In [37]:
print a2out

[[  1.39271128e-01  -1.47073685e-01]
 [ -2.42972784e-02   8.52426383e-05]
 [  1.27108651e-02  -9.44604855e-02]
 [  2.89191038e-02  -3.13306059e-02]
 [  1.44004912e-02  -1.52538651e-02]
 [  3.06855889e-02  -2.45646579e-02]
 [  6.65065518e-02  -6.83683774e-02]
 [  2.78182219e-01  -3.08212668e-01]
 [  2.07816696e-01  -2.34361366e-01]
 [  1.96092227e-01  -1.98066416e-01]
 [ -2.44628208e-01  -1.15346274e-01]
 [ -2.61547473e-01  -1.50072037e-01]
 [ -1.39661243e-01  -1.01445355e-01]
 [ -3.76554093e-01  -1.93917218e-01]
 [ -1.98683572e-01  -8.73039451e-02]
 [ -2.91665422e-02  -1.38953227e-02]
 [ -1.59884466e-01  -8.34738060e-02]
 [ -1.06515107e-01  -2.98546880e-02]
 [ -3.89323114e-01  -2.03040224e-01]
 [ -1.35282500e-01  -8.01016587e-02]
 [ -9.53123766e-03   6.63512861e-02]
 [ -5.17548149e-02   2.38336964e-02]
 [ -1.13831063e-01   5.38326679e-02]
 [ -1.68752430e-01   5.84017586e-02]
 [ -2.50071324e-01   1.18834173e-01]
 [ -4.30246847e-03   3.95968662e-02]
 [ -1.77311560e-01   5.04399562e-02]
 

In [38]:
print len(nodelist)
print(nodelist)

400
['344', '0', '346', '347', '340', '341', '342', '343', '348', '349', '298', '299', '296', '297', '294', '295', '292', '293', '290', '291', '199', '198', '195', '194', '197', '196', '191', '190', '193', '192', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '108', '109', '102', '103', '100', '101', '106', '107', '104', '105', '39', '38', '33', '32', '31', '30', '37', '36', '35', '34', '339', '338', '335', '334', '337', '336', '331', '330', '333', '332', '345', '6', '99', '98', '91', '90', '93', '92', '95', '94', '97', '96', '238', '239', '234', '235', '236', '237', '230', '231', '232', '233', '1', '146', '147', '144', '145', '142', '143', '140', '141', '148', '149', '133', '132', '131', '130', '137', '136', '135', '134', '139', '138', '24', '25', '26', '27', '20', '21', '22', '23', '28', '29', '379', '378', '371', '370', '373', '372', '375', '374', '377', '376', '393', '392', '88', '89', '397', '396', '395', '394', '82', '83', '80', '81', '86', '87', '84', '85'

In [39]:
a_list = []
b_list = []
c_list = []
d_list = []
for ind, i in enumerate(nodelist):
    region = int(float(i))/100
    if region == 0:
        a_list.append(ind)
#         a_list.append(int(float(i)))
    elif region == 1:
        b_list.append(ind)
#         b_list.append(int(float(i)))
    elif region == 2:
        c_list.append(ind)
#         c_list.append(int(float(i)))
    elif region == 3:
        d_list.append(ind)
#         d_list.append(int(float(i)))
        
a_region = a2out[a_list]  # V[0:9,:].mean(axis=0)
# print a_region
b_region = a2out[b_list]  # V[10:19,:].mean(axis=0)
c_region = a2out[c_list]  # V[20:29,:].mean(axis=0)
d_region = a2out[d_list]  # V[30:39,:].mean(axis=0)
se_regions = OrderedDict([('a', a_region),('b', b_region),('c', c_region),('d', d_region)])

In [40]:
print a_list

[1, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 92, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 135, 136, 141, 142, 143, 144, 145, 146, 147, 148, 149, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 244, 265, 266, 269, 270, 271, 272, 291, 314, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 351, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399]


In [41]:
a_r_avg = a_region.mean(axis=0)
print a_r_avg
b_r_avg = b_region.mean(axis=0)
c_r_avg = c_region.mean(axis=0)
d_r_avg = d_region.mean(axis=0)

[ 0.16076066  0.28616511]


In [42]:
plot_connectivity(se_regions)

region a: b
region b: c
region c: b
region d: c


### testing on Aut1367.graphml

In [48]:
aut = nx.read_graphml("Aut1367.graphml")

aut_am = nx.adjacency_matrix(aut)
aut_nodes = aut.nodes()

In [44]:
aut_se = se(aut_am, n_components=3,drop_first=True)
print aut_se.shape

(5090, 3)


In [106]:
print(fear_se)

[[ -1.28038741e-03   6.99428950e-04  -2.04421902e-03]
 [  6.04580620e-27   9.08969517e-23  -2.99327109e-22]
 [  1.40076039e-03   5.97098700e-03  -6.80572655e-03]
 ..., 
 [ -4.48135595e-03   2.44800132e-03  -7.15476656e-03]
 [ -3.84116224e-03   2.09828685e-03  -6.13265705e-03]
 [ -6.14585959e-03   3.35725896e-03  -9.81225129e-03]]


In [46]:
points = np.genfromtxt('Aut1367.region.csv', delimiter=',')

In [53]:
print(aut_am.shape)

(5090, 5090)


In [56]:
print(len(aut_nodes))
print(aut_nodes)

5090
['s2040', 's2041', 's2042', 's2043', 's2044', 's2045', 's2046', 's2047', 's2048', 's2049', 's2734', 's2738', 's2739', 's2286', 's2287', 's2284', 's2285', 's2282', 's2283', 's2280', 's2281', 's2732', 's3296', 's2288', 's2289', 's1218', 's1838', 's1839', 's1219', 's1834', 's1835', 's1836', 's1837', 's1830', 's1831', 's1832', 's1833', 's57', 's56', 's55', 's54', 's53', 's52', 's51', 's50', 's1813', 's59', 's58', 's1748', 's691', 's1749', 's1210', 's2138', 's2139', 's1211', 's2134', 's2135', 's2136', 's2137', 's2130', 's2131', 's2132', 's2133', 's4319', 's4063', 's1214', 's181', 's1215', 's5090', 's1216', 's1217', 's1409', 's1408', 's1792', 's3845', 's1562', 's1563', 's3191', 's327', 's1403', 's3719', 's3718', 's1934', 's3713', 's3712', 's3711', 's3710', 's3717', 's3716', 's3715', 's3714', 's3190', 's2786', 's1568', 's1111', 's1569', 's1930', 's4151', 's320', 's799', 's798', 's3399', 's3398', 's795', 's794', 's797', 's796', 's791', 's790', 's793', 's792', 's1007', 's3159', 's3158', 's

In [54]:
print(aut_se.shape)
print(aut_se)

(5090, 3)
[[ -2.17845287e-02   2.99166270e-03  -8.76354739e-03]
 [  6.33334775e-04   3.60823244e-03  -5.23366715e-04]
 [  4.04575566e-05   4.11609843e-03  -1.57395833e-03]
 ..., 
 [  6.06863349e-05   6.17414765e-03  -2.36093750e-03]
 [ -2.45075948e-02   3.36562053e-03  -9.85899082e-03]
 [ -5.44613218e-03   7.47915674e-04  -2.19088685e-03]]


In [47]:
print(points)

[[  50.  525.  349.  242.  242.]
 [  60.  519.  252.  242.  242.]
 [  61.  504.  237.  237.  237.]
 ..., 
 [ 544.  515.  338.  242.  242.]
 [ 547.  478.  268.  242.  242.]
 [ 553.  458.  255.  242.  242.]]


In [86]:
def add_to_dict(d, region, index):
    if region in d:
        d[region].append(index)
    else:
        d[region] = [index]
        
    return d

In [87]:
d = {}

for index, node in enumerate(aut_nodes):
    point1 = points[index]
    region = point1[4]
    s = aut.node[node]['attr']
    point2 = ast.literal_eval(s)
    add_to_dict(d, region, index)

In [108]:
print(points[0])
print(points[1])
print(points[200])
print(points[3000])

[  50.  525.  349.  242.  242.]
[  60.  519.  252.  242.  242.]
[ 106.  658.  958.  237.  237.]
[ 333.  637.  445.  248.  248.]


In [97]:
m = {}
for key, value in d.iteritems():
    index_list = value
    m[str(key)] = aut_se[index_list]
    

# a_region = a2out[a_list]  # V[0:9,:].mean(axis=0)
# # print a_region
# b_region = a2out[b_list]  # V[10:19,:].mean(axis=0)
# c_region = a2out[c_list]  # V[20:29,:].mean(axis=0)
# d_region = a2out[d_list]  # V[30:39,:].mean(axis=0)
# se_regions = OrderedDict([('a', a_region),('b', b_region),('c', c_region),('d', d_region)])

In [98]:
print(m)

{'237.0': array([[  4.04575566e-05,   4.11609843e-03,  -1.57395833e-03],
       [  6.29564205e-03,  -3.71912434e-02,   1.58604882e-02],
       [  2.79806313e-03,  -1.65294415e-02,   7.04910588e-03],
       ..., 
       [  5.39434088e-05,   5.48813125e-03,  -2.09861111e-03],
       [  4.04575566e-05,   4.11609843e-03,  -1.57395833e-03],
       [  5.70001297e-03,   3.24740919e-02,  -4.71030043e-03]]), '235.0': array([[ -5.85960614e-03,   2.13226865e-03,  -2.71306312e-03],
       [  1.56018741e-02,  -7.54720774e-03,   8.60182460e-03],
       [  5.49325026e-03,  -9.69167963e-03,   3.90280260e-03],
       [ -3.75725134e-02,   4.27642569e-02,  -8.26710398e-02],
       [ -3.00580107e-02,   3.42114055e-02,  -6.61368319e-02],
       [ -4.33936134e-03,  -5.08713690e-03,  -4.29715936e-03],
       [  3.37146305e-05,   3.43008203e-03,  -1.31163195e-03],
       [  3.16667387e-03,   1.80411622e-02,  -2.61683357e-03],
       [ -3.13677920e-02,   1.63545585e-02,  -7.56210753e-03],
       [ -1.91249963e

In [101]:
plot_connectivity3d(m)

region 237.0: 253.0
region 235.0: 232.0
region 250.0: 232.0
region 236.0: 234.0
region 247.0: 230.0
region 255.0: 241.0
region 248.0: 247.0
region 251.0: 231.0
region 249.0: 234.0
region 246.0: 244.0
region 245.0: 249.0
region 240.0: 231.0
region 241.0: 255.0
region 254.0: 244.0
region 244.0: 254.0
region 242.0: 254.0
region 243.0: 230.0
region 234.0: 249.0
region 233.0: 253.0
region 232.0: 250.0
region 231.0: 255.0
region 230.0: 247.0
region 239.0: 253.0
region 238.0: 243.0
region 253.0: 233.0


In [103]:
print(m.keys())

['237.0', '235.0', '250.0', '236.0', '247.0', '255.0', '248.0', '251.0', '249.0', '246.0', '245.0', '240.0', '241.0', '254.0', '244.0', '242.0', '243.0', '234.0', '233.0', '232.0', '231.0', '230.0', '239.0', '238.0', '253.0']


In [66]:
node = aut_nodes[0]
print(node)
print(aut.node[node]['attr'])
print(aut.node[node]['attr'][0])
print(aut.node[node]['attr'][2])

s2040
[268, 426, 430]
[
6


In [69]:
import ast

node = aut_nodes[0]
print(node)
s = aut.node[node]['attr']
point = ast.literal_eval(s)
print(point)
print(point[0])


s2040
[268, 426, 430]
268


In [104]:
plot_connectivity()

AttributeError: 'numpy.ndarray' object has no attribute 'keys'

In [73]:
def plot_graphml(G):
    current_palette = sns.color_palette("husl", len(G.nodes())/10)
    Xe = []
    Ye = []
    Ze = []
    data = []
    i = 0
    
    X = []
    Y = []
    Z = []
    regiondict = {}
    for r, node in enumerate(G.nodes()):
        tmp = G.node[node]
#         print tmp
        pos = tmp['pos']
        region = tmp['region']
        if str(region) not in regiondict:
            regiondict[str(region)] = [pos]
        else:
            tmp = regiondict[str(region)]
            tmp.append(pos)
            regiondict[str(region)] = tmp
#         print region
#         print pos
#     print regiondict
    for region, reg in enumerate(regiondict):
        for pos in regiondict[reg]:
#             print pos
            X.append(pos[0])
            Y.append(pos[1])
                
        region_col = current_palette[region]
        region_col_lit = 'rgb' + str(region_col)
        
        trace_scatter = Scatter(
                x = X, 
                y = Y,
                name=region,
                mode='markers',
                marker=dict(
                    size=10,
                    color=region_col_lit, #'purple',                # set color to an array/list of desired values
                    colorscale='Viridis',   # choose a colorscale
                    opacity=0.5
                )
        )
        data.append(trace_scatter)
        X = []
        Y = []
        
    for r, edge in enumerate(G.edges()):
        firstpt = G.node[edge[0]]
        secondpt = G.node[edge[1]]
#         print firstpt
        dist = LA.norm(firstpt['pos'] - secondpt['pos'])
#         tmp.append(dist)
#         print dist

        Xe+=[firstpt['pos'][0],secondpt['pos'][0],None]
        Ye+=[firstpt['pos'][1],secondpt['pos'][1],None]
#         Ze+=[dictionary[key][0][2],dictionary[tmp2][0][2],None]
#     print Xe
#     print Ye
    
    trace_edge = Scatter(x=Xe,
               y=Ye,
               mode='lines',
               line=Line(color='rgb(0,0,0)', width=2),
               hoverinfo='none'
    )

    data.append(trace_edge)
    
    layout = Layout(
#         margin=dict(
#             l=0,
#             r=0,
#             b=0,
#             t=0
#         ),
        paper_bgcolor='rgb(255,255,255)',
        plot_bgcolor='rgb(255,255,255)'
    )
        
    fig = Figure(data=data, layout=layout)
    iplot(fig, validate=False)

In [78]:
plot_connectivity3d(fear199)

AttributeError: 'Graph' object has no attribute 'keys'

In [79]:
print np.allclose(A,out)
print np.allclose(A,b)
print np.diagonal(V)
v = np.diagonal(V)
print V.shape
# print V

True
True
[ -2.04490597e-02   1.44010363e-07  -6.58109774e-04   5.21823762e-14
  -4.92115063e-05  -4.58819189e-06  -2.09930528e-06  -4.53644487e-05
   2.14247757e-01   1.64327199e-04   4.58404264e-04  -9.10483931e-03
   1.18521779e-02   5.35296689e-04  -7.12953582e-08  -2.43450068e-03
  -4.43236314e-04   1.20375777e-01   1.38012283e-04  -7.40450009e-04
  -3.24305092e-04  -1.84548109e-05  -3.04271101e-01   1.24388639e-02
   1.08987693e-02  -6.44306783e-04   4.59377808e-02  -3.59019326e-02
  -1.84816982e-08  -8.73362448e-03   2.69106586e-07   1.80407184e-02
  -1.68002414e-03  -9.80753286e-03   3.22621782e-02  -7.13315053e-03
   1.30020706e-04   7.97779967e-07   7.86879596e-03   4.81091041e-02
   2.20921707e-02   5.91576707e-02   1.95103194e-02   1.76573912e-03
   1.78304814e-03   2.37142317e-03   1.47525910e-02  -5.82443242e-02
   2.16433197e-02  -5.89303547e-06  -3.46887478e-03   5.16813602e-04
  -4.32729680e-03   2.88631666e-02  -5.63782725e-04  -2.66583737e-01
  -9.79418935e-03  -4.21

In [80]:
tmp = [[1,0]]
tmp.append([2,0])
tmp.append([3,0])
print tmp

[[1, 0], [2, 0], [3, 0]]


In [81]:
# from matplotlib.pyplot import *
# import matplotlib.pyplot as plt
# %matplotlib inline

# colors = cm.rainbow(np.linspace(0,1,10))
Adict = OrderedDict()
avg = []
real = 0
imag = 0
for r, i in enumerate(v):
    region = r%10
    reg = r/10
#     print reg
    if 'region' + str(reg) not in Adict:
        Adict['region'+str(reg)] = [[i.real,i.imag]]
    else:
        tmp = Adict['region'+str(reg)]
        tmp.append([i.real,i.imag])
        Adict['region'+str(reg)] = tmp
#     if region == 0:
#         real = 0
#         imag = 0
#     else:
#         real += i.real
#         imag += i.imag
#     c = colors[r/10]
#     if r == 9:
#         plt.scatter(real/10,imag/10, color=c,s=40)
#     plt.scatter(i.real,i.imag,color=c)

In [82]:
G=nx.from_numpy_matrix(A)

In [83]:
plot_connectivity(Adict)

region region0: region34
region region1: region22
region region2: region20
region region3: region4
region region4: region3
region region5: region2
region region6: region22
region region7: region10
region region8: region11
region region9: region39
region region10: region7
region region11: region8
region region12: region7
region region13: region23
region region14: region18
region region15: region0
region region16: region30
region region17: region32
region region18: region14
region region19: region14
region region20: region2
region region21: region26
region region22: region1
region region23: region13
region region24: region33
region region25: region31
region region26: region21
region region27: region36
region region28: region24
region region29: region38
region region30: region36
region region31: region35
region region32: region17
region region33: region24
region region34: region0
region region35: region31
region region36: region30
region region37: region5
region region38: region29
region 

In [99]:
print Adict

OrderedDict([('region0', [[-0.020449059665810447, 0.0], [1.4401036314877591e-07, 0.0], [-0.00065810977381890407, 0.0], [5.2182376187436285e-14, 0.0], [-4.9211506262587882e-05, 0.0], [-4.5881918895267038e-06, 0.0], [-2.0993052803228115e-06, 0.0], [-4.5364448675650738e-05, 0.0], [0.21424775692176978, 0.0], [0.00016432719924219401, 0.0]]), ('region1', [[0.00045840426398652278, 0.0], [-0.009104839314994187, 0.0], [0.011852177863579082, 0.0], [0.00053529668906559125, 0.0], [-7.1295358235266252e-08, 0.0], [-0.0024345006819777386, 0.0], [-0.00044323631435356993, 0.0], [0.12037577739386628, 0.0], [0.00013801228298159139, 0.0], [-0.00074045000901895228, 0.0]]), ('region2', [[-0.00032430509165717224, 0.0], [-1.8454810855319429e-05, 0.0], [-0.3042711014540273, 0.0], [0.012438863885174197, 0.0], [0.010898769267694078, 0.0], [-0.00064430678333571062, 0.0], [0.045937780806892335, 0.0], [-0.03590193256826809, 0.0], [-1.8481698170938521e-08, 0.0], [-0.0087336244811743648, 0.0]]), ('region3', [[2.69106

In [97]:
plot_connectivity3d(Adict)

IndexError: list index out of range

In [84]:
az_norm = np.random.normal(1, 0.1, 10)
bz_norm = np.random.normal(1, 0.1, 10)
cz_norm = np.random.normal(3, 0.1, 10)
dz_norm = np.random.normal(3.5, 0.1, 10)

a_norm3d = np.column_stack((ax_norm,ay_norm,az_norm))
b_norm3d = np.column_stack((bx_norm,by_norm,bz_norm))
c_norm3d = np.column_stack((cx_norm,cy_norm,cz_norm))
d_norm3d = np.column_stack((dx_norm,dy_norm,dz_norm))

norm_dict3d = OrderedDict([('a',a_norm3d),('b',b_norm3d),('c',c_norm3d),('d',d_norm3d)])

NameError: name 'ax_norm' is not defined

In [100]:
def plot_connectivity3d(dictionary):
    current_palette = sns.color_palette("husl", len(dictionary.keys()))
    Xe = []
    Ye = []
    Ze = []
    data = []
    avg_dict = OrderedDict()
    i = 0
    for key, region in dictionary.iteritems():
        X = []
        Y = []
        Z = []
        tmp_x = []
        tmp_y = []
        tmp_z = []
        region_col = current_palette[i]
        region_col_lit = 'rgb' + str(region_col)
        i += 1
        for coord in region:    
            X.append(coord[0])
            Y.append(coord[1])
            Z.append(coord[2])
            tmp_x.append(coord[0])
            tmp_y.append(coord[1])
            tmp_z.append(coord[2])
        avg_dict[key] = [[np.mean(tmp_x), np.mean(tmp_y), np.mean(tmp_z)]]
            
        trace_scatter = Scatter3d(
                x = X, 
                y = Y,
                z = Z,
                name=key,
                mode='markers',
                marker=dict(
                    size=10,
                    color=region_col_lit, #'purple',                # set color to an array/list of desired values
                    colorscale='Viridis',   # choose a colorscale
                    opacity=0.5
                )
        )
        avg_scatter = Scatter3d(
                x = [avg_dict[key][0][0]],
                y = [avg_dict[key][0][1]],
                z = [avg_dict[key][0][2]],
                mode='markers',
                name=key+'_avg',
                marker=dict(
                    size=10,
                    color=region_col_lit,
                    colorscale='Viridis',
                    line=dict(
                        width = 2,
                        color = 'rgb(0, 0, 0)'
                    )
                )
        )
        data.append(trace_scatter)
        data.append(avg_scatter)
        
    locations = avg_dict.keys()
#     print locations
    for i, key in enumerate(avg_dict):
#         if i + 1 == len(locations):
#             continue
#         print 'start' + str(i)
        tmp = []
        for j in range(len(locations)):
            if j == i:
                continue
            p1 = np.asarray(avg_dict[key][0])
            p2 = np.asarray(avg_dict[locations[j]][0])
            dist = LA.norm(p1 - p2)
#             dist = (math.pow(avg_dict[key][0][0]-avg_dict[locations[j]][0][0],2) + 
#                        math.pow(avg_dict[key][0][1]-avg_dict[locations[j]][0][1],2) +
#                        math.pow(avg_dict[key][0][2]-avg_dict[locations[j]][0][2],2))
            tmp.append(dist)
#             print dist
        newmin = tmp.index(min(tmp))
        if newmin >= i:
            newmin += 1
#         print newmin
        print "region " + key + ": " + locations[newmin]
        tmp2 = avg_dict.keys()[newmin]
        Xe+=[avg_dict[key][0][0],avg_dict[tmp2][0][0],None]
        Ye+=[avg_dict[key][0][1],avg_dict[tmp2][0][1],None]
        Ze+=[avg_dict[key][0][2],avg_dict[tmp2][0][2],None]
#     print Xe
#     print Ye
    
    trace_edge = Scatter3d(x=Xe,
                y=Ye,
                z=Ze,
                mode='lines',
                line=Line(color='rgb(0,0,0)', width=3),
                hoverinfo='none'
    )

    data.append(trace_edge)
    
    layout = Layout(
#         margin=dict(
#             l=0,
#             r=0,
#             b=0,
#             t=0
#         ),
        paper_bgcolor='rgb(255,255,255)',
        plot_bgcolor='rgb(255,255,255)'
    )
        
    fig = Figure(data=data, layout=layout)
    iplot(fig, validate=False)

In [98]:
plot_connectivity3d(norm_dict3d)

NameError: name 'norm_dict3d' is not defined

In [None]:
token = 'Aut1367'
data_txt = 'Aut1367reorient_atlas.region.csv'
data = np.genfromtxt(data_txt, delimiter=',', dtype='int', usecols = (0,1,2,4), names=['x','y','z','region'])

In [None]:
ccf_txt = 'natureCCFOhedited.csv'
ccf = {}

with open(ccf_txt, 'rU') as csvfile:
    csvreader = csv.reader(csvfile)
    for row in csvreader:
        # row[0] is ccf atlas index, row[4] is string of full name
        ccf[row[0]] = row[4]