# Perfect projections

In [None]:
#!pip install -r requirements.txt

## How to use a projection

In [None]:
import csv
with open('./data/movie.csv', 'r') as c:
    reader = csv.reader(c)
    data = [line for line in reader]

print(len(data))

In [None]:
edge_types = set([triple[1] for triple in data])
print(edge_types)

Head over to Neo4J browser to create an index:

In [None]:
%%writefile cypher/create_index.cql
CREATE INDEX entity 
FOR (e:Entity)
ON (e.name)

In [None]:
%%writefile cypher/create_rel_apoc.cql
:auto LOAD CSV FROM 'file:///movie.csv' AS row
CALL {
    WITH row
    MERGE (n:Entity {name:row[0]})
    MERGE (m:Entity {name:row[2]})
    WITH n, m, row[1] as type
    CALL apoc.create.relationship(n, type, {}, m)
    YIELD rel
    RETURN 1 as x
} IN TRANSACTIONS OF 1000 ROWS
RETURN x

In [None]:
%%writefile cypher/edge_match.cql
MATCH (n:Entity {name:'002 Operazione Luna'})-[:COUNTRY]-(m:Entity {name:'Italy'})
RETURN n, m

## Creating a projection in igraph

In [None]:
from graphtastic.database.neo4j import Neo4jConnect

Head to Neo4j browser:

In [None]:
%%writefile cypher/starred_in_rel.cql
MATCH (act1:Entity)<-[:STARRING]-(film:Entity)-[:STARRING]->(act2:Entity)
RETURN act1, film, act2 LIMIT 1

In [None]:
def get_co_stars_neo4j(connection):
    query = 'MATCH (act1:Entity)<-[:STARRING]-(film:Entity)'  \
            '-[:STARRING]->(act2:Entity)  '  \
            'RETURN act1, act2'
            
    result = connection.query(query).data()
    result = [[act['act1']['name'], act['act2']['name']] for act in result]


    return result

In [None]:
connection = Neo4jConnect('bolt://localhost:7687', 'admin', 'testpython')
print(connection)

In [None]:
co_stars = get_co_stars_neo4j(connection)
connection.close()
print(co_stars[:5])

In [None]:
nodes = list(set([node for edge in co_stars for node in edge]))

In [None]:
igraph_ids = {film:node_id for node_id, film in enumerate(nodes)}
print(igraph_ids)

In [None]:
edgelist = [[igraph_ids[n], igraph_ids[m]] for n, m in co_stars]

In [None]:
import igraph
g = igraph.Graph()
g.add_vertices(len(igraph_ids))
g.add_edges(edgelist)
g.vs['actor'] = nodes

In [None]:
assert g.vs[2]['actor'] == nodes[2]
assert len(g.vs) == len(nodes)

In [None]:
actor1 = g.vs[g.es[0].source]['actor']
actor2 = g.vs[g.es[0].target]['actor']

In [None]:
connection = Neo4jConnect('bolt://localhost:7687', 'admin', 'testpython')
query = f'MATCH (act1:Entity {{name:"{actor1}"}})' \
         '<-[:STARRING]-(film:Entity)' \
        f'-[:STARRING]->(act2:Entity {{name:"{actor2}"}}) ' \
         'RETURN act1, film, act2'
result = connection.query(query).data()
connection.close()
print(result)

## Creating a projection in Neo4j

In [None]:
%%writefile cypher/create_film_index.cql
​​CREATE INDEX film
FOR (f:Film)
ON f.name

In [None]:
%%writefile cypher/periodic_iterate.cql
CALL apoc.periodic.iterate(
    "MATCH (film1:Entity)-[:STARRING]->(actor:Entity)
     <-[:STARRING]-(film2:Entity) 
     RETURN film1, film2",
    "MERGE (f1:Film {name:film1.name})
     MERGE (f2:Film {name:film2.name})
     MERGE (f1)-[:HAS_COMMON_ACTORS]->(f2)",
    {batchSize:1000, parallel:false}
)

In [None]:
%%writefile cypher/period_it_parallel.cql
CALL apoc.periodic.iterate(
    "MATCH (film:Entity)-[:COUNTRY]->(country:Entity) 
     RETURN film, country",
    "MATCH (f:Film {name:film.name})
     SET f.country = coalesce(f.country, []) + country.name",
    {batchSize:10000, parallel:true}
)

In [None]:
%%writefile cypher/remove_nodes_and_edges.cql
CALL apoc.periodic.iterate("
    MATCH (f:Film) RETURN f",
    "DETACH DELETE f", 
    {batchSize:1000, parallel:false}
)

## Putting the projection to work

### Analyzing the *igraph* actor projection

In [None]:
print(len(g.vs))
print(len(g.es))

In [None]:
degree = g.degree(igraph_ids.values())

In [None]:
import matplotlib.pyplot as plt
plt.hist(g.degree(), bins=20, edgecolor='#1260CC', color='#3A9BDC')
plt.xlabel('Node degree')
plt.ylabel('Frequency')
plt.yscale('log')
plt.show()

In [None]:
actor_degree = list(zip(degree, g.vs['actor']))
actor_degree = sorted(actor_degree, key=lambda x: x[0], reverse=True)
print(actor_degree[:3])

## Exploring connected components

In [None]:
cc = g.components(mode='weak')
print(len(cc))

In [None]:
cc_size = [len(component) for component in cc]
cc_size.sort(reverse=True)
print(cc_size[:10])

In [None]:
# Look at frequency of component size
from collections import Counter
cc_freq = dict(Counter(cc_size))
cc_freq = sorted(cc_freq.items(), key=lambda x: x[0], reverse=True)
print(cc_freq)

## Exploring cliques in our graph

In [None]:
largest_clique = g.largest_cliques()[0]
print(len(largest_clique))
print(largest_clique)

In [None]:
clique_actors = g.vs[largest_clique]['actor']
print(clique_actors)

## Analyzing the Neo4j film projection

In [None]:
%%writefile cypher/film_match.cql
MATCH (f:Film) RETURN count(f)

In [None]:
%%writefile cypher/has_common_actor_match.cql
MATCH (:Film)-[r:HAS_COMMON_ACTORS]->(:Film) RETURN count(r)

In [None]:
%%writefile cypher/has_comm_act_by_country.cql
MATCH (f1:Film)-[:HAS_COMMON_ACTORS]->(f2:Film)
WITH f1.country as c1, f2.country as c2
WHERE c1 <> c2
WITH [c1, c2] as country_pair
RETURN country_pair, count(country_pair)
ORDER BY count(country_pair) DESC