In [1]:
from neo4j.v1 import GraphDatabase, basic_auth
import pandas as pd

In [2]:
driver = GraphDatabase.driver("bolt://localhost", 
                              auth=basic_auth("neo4j", "theraturkey"))
session = driver.session()

Useful Commands:


In [3]:
filename = '../data/Methylobacter--Methylotenera/Methylobacter-123--Methylotenera_mobilis-49_network--some_top_genes--1268_rows.tsv'

In [4]:
filename

'../data/Methylobacter--Methylotenera/Methylobacter-123--Methylotenera_mobilis-49_network--some_top_genes--1268_rows.tsv'

In [5]:
! ls ../data/Methylobacter--Methylotenera

Methylobacter-123--Methylotenera_mobilis-49_network--100.tsv
Methylobacter-123--Methylotenera_mobilis-49_network--some_top_genes--1268_rows.tsv
Methylobacter-123--Methylotenera_mobilis-49_network.tsv


In [6]:
df = pd.read_csv(filename, sep='\t')

In [7]:
df.head(4)

Unnamed: 0.1,Unnamed: 0,source,target,weight,association,target_organism,target_gene,source_organism,source_gene,source_organism_name,target_organism_name,source_gene_product,target_gene_product
0,417,Ga0081607_113410,Ga0081607_10311,0.07474,positive,Ga0081607,10311,Ga0081607,113410,Methylobacter-123 (UID203),Methylobacter-123 (UID203),hypothetical protein,hypothetical protein
1,1808,Ga0081607_10311,Ga0081607_113410,0.07474,positive,Ga0081607,113410,Ga0081607,10311,Methylobacter-123 (UID203),Methylobacter-123 (UID203),hypothetical protein,hypothetical protein
2,1641,Ga0081629_11135,Ga0081629_10266,0.07207,positive,Ga0081629,10266,Ga0081629,11135,Methylotenera mobilis-49 (UID203),Methylotenera mobilis-49 (UID203),hypothetical protein,type IV pilus assembly protein PilA
3,1360,Ga0081629_10266,Ga0081629_11135,0.07207,positive,Ga0081629,11135,Ga0081629,10266,Methylotenera mobilis-49 (UID203),Methylotenera mobilis-49 (UID203),type IV pilus assembly protein PilA,hypothetical protein


In [8]:
df.shape

(1268, 13)

In [9]:
# Calculate expected number of nodes:
nodes = len(set(df['source'].drop_duplicates().tolist() + df['target'].drop_duplicates().tolist()))
print("{} nodes are expected".format(nodes))

228 nodes are expected


In [10]:
org_names = set(df['source_organism_name'].drop_duplicates().tolist() + 
                df['target_organism_name'].drop_duplicates().tolist())
count = len(org_names)
if count != 2:
    print("Expected exactly 2 organsm names, but we have {}".format(count))
    print(org_names)

In [24]:
org_names

{'Methylobacter-123 (UID203)', 'Methylotenera mobilis-49 (UID203)'}

In [21]:
# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)

result = session.run(""" MATCH (n) RETURN n """)
num_results = 0
for record in result:
    num_results += 1
    print(record)
print('number of nodes before starting: {}'.format(num_results))

command = """
    LOAD CSV WITH HEADERS FROM 
        'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/Methylobacter--Methylotenera/Methylobacter-123--Methylotenera_mobilis-49_network--some_top_genes--1268_rows.tsv'  
    AS line FIELDTERMINATOR '\t'
    MERGE (g1:Gene {org_gene:line.source, 
                    organism:line.source_organism_name, 
                    gene:line.source_gene,
                    gene_product:line.source_gene_product}) 
    MERGE (g2:Gene {org_gene:line.target,
                    organism:line.target_organism_name,
                    gene:line.target_gene,
                    gene_product:line.target_gene_product}) 
    MERGE (g1) -[:X {weight:line.weight, association:line.association}]-> (g2); 
    """
result = session.run(command.rstrip())

result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
    num_results += 1
    #print(record)
print('number of nodes after loading: {}'.format(num_results))

number of nodes before starting: 0
number of nodes after loading: 228


In [22]:
command = """
MATCH (n)
RETURN count(*)
"""
result = session.run(command.rstrip())

for record in result:
    print(record)

<Record count(*)=228>


In [23]:
# set edge labels: write over them and erase the old ones.
command = """
    match (n)-[e1 {association:"positive"}]->(m)
    CREATE (n)-[e2:pos]->(m)
    SET e2 = e1
    WITH e1
    DELETE e1
"""
result = session.run(command.rstrip())

command = """
    match (n)-[e1 {association:"negative"}]->(m)
    CREATE (n)-[e2:neg]->(m)
    SET e2 = e1
    WITH e1
    DELETE e1
"""
result = session.run(command.rstrip())


In [14]:
command = """
MATCH (n)
RETURN count(*)
"""
result = session.run(command.rstrip())

for record in result:
    print(record)

<Record count(*)=228>


In [15]:
# Get a sub-graph with some negative interactions for my progress report figure. 

In [16]:
# Start by just finding some edges that are negative. 
# If you don't specify a direction, each edge gets counted twice!
command = """
MATCH () -[r]- ()
RETURN count(*)
"""
result = session.run(command.rstrip())
for record in result:
    print(record)

<Record count(*)=2536>


In [17]:
command = """
MATCH () --> ()
RETURN count(*)
"""
result = session.run(command.rstrip())
for record in result:
    print(record)

<Record count(*)=1268>


In [18]:
command = """
MATCH () -[r:pos]-> ()
RETURN count(*)
"""
result = session.run(command.rstrip())
for record in result:
    print(record)

<Record count(*)=1092>


In [20]:
command = """
MATCH () -[r:neg]-> ()
RETURN count(*)
"""
result = session.run(command.rstrip())
for record in result:
    print(record)

<Record count(*)=176>


In [None]:
command = """
MATCH (a) --> (b)
RETURN count(*)
"""
result = session.run(command.rstrip())
for record in result:
    print(record)

In [28]:
# There might be better ways to do this... 
# http://stackoverflow.com/questions/29922140/labels-on-nodes-and-relationships-from-a-csv-file
# 'Methylobacter-123 (UID203)', 'Methylotenera mobilis-49 (UID203)'
command = """
    match (n {organism:'Methylotenera mobilis-49 (UID203)'})
    set n :'Methylotenera_mobilis-49'
    return n
"""
result = session.run(command.rstrip())
command = """
    match (n {organism:'Methylobacter-123 (UID203)'})
    set n :'Methylobacter-123'
    return n
"""
result = session.run(command.rstrip())

In [29]:
## To label the node according to the variable, I think I need to make my nodes before adding the edges. 

In [None]:
command = """
    match (n {organism:'Methylotenera mobilis-49 (UID203)'})
    set n :'Methylotenera_mobilis-49'
    return n
"""
result = session.run(command.rstrip())

In [None]:
MATCH (n1)-[old:BRANCH]->(n2) 
CREATE (n1)-[new:CONTAINS]->(n2) 
DELETE old

In [None]:
assert False
# old stuff below.

# OLD BELOW

In [None]:
# Fill in more attributes.  Add in edges with attributes from the line.

# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)

# add the source nodes, if they don't already exist:
command = """
    LOAD CSV WITH HEADERS FROM 
        'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken.csv'  
    AS line
    MERGE (g1:Gene {org_gene:line.source, 
                    organism:line.source_organism, 
                    gene:line.source_gene}) 
    MERGE (g2:Gene {org_gene:line.target,
                    organism:line.target_organism,
                    gene:line.target_gene}) 
    MERGE (g1) -[:X {weight:line.weight, association:line.association}]-> (g2); 
    """
result = session.run(command.rstrip())

# There might be better ways to do this... 
# http://stackoverflow.com/questions/29922140/labels-on-nodes-and-relationships-from-a-csv-file
command = """
    match (n {organism:'Ga0081607'})
    set n :organism_A
    return n
"""
result = session.run(command.rstrip())
command = """
    match (n {organism:'Ga0081629'})
    set n :organism_B
    return n
"""
result = session.run(command.rstrip())

# See how many nodes I got. 
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
    num_results += 1
    print(record)
print('number of nodes added from source and dest column: {}'.format(num_results))

# ------------------------------------------------------------------------

http://stackoverflow.com/questions/22670369/neo4j-cypher-how-to-change-the-type-of-a-relationship

`
MATCH (n:User {name:"foo"})-[r:REL]->(m:User {name:"bar"})
CREATE (n)-[r2:NEWREL]->(m)
SET r2 = r
WITH r
DELETE r
`

In [None]:
# set edge labels: write over them and erase the old ones.
command = """
    match (n)-[e1 {association:"positive"}]-(m)
    CREATE (n)-[e2:pos]->(m)
    SET e2 = e1
    WITH e1
    DELETE e1
"""
result = session.run(command.rstrip())
for record in result:
    print(record)

Note that I've edited the colors by hand so far.
Open the `:style` sheet (in Neo4j browser), and see things like this: 

`
node.organism_A {
  color: #68BDF6;
  border-color: #5CA8DB;
  text-color-internal: #FFFFFF;
  caption: '{gene}';
}

node.organism_B {
  color: #DE9BF9;
  border-color: #BF85D6;
  text-color-internal: #FFFFFF;
  caption: '{gene}';
}

relationship.neg {
  shaft-width: 2px;
  color: #FF756E;
  border-color: #E06760;
  text-color-internal: #FFFFFF;
}

relationship.pos {
  shaft-width: 2px;
  color: #6DCE9E;
  border-color: #60B58B;
  text-color-internal: #FFFFFF;
}`

I couldn't find this in a version-controlled repository.  Somewhere some GRASS file exists...

Watch this screencast?

https://neo4j.com/developer/guide-data-visualization/

"we demonstrate how to style nodes and relationships in the Neo4j’s Browser visualization, and how to set colors, sizes, and titles."