## Graph Similarity Search Project ##
### This is the implementation for the similarity search project using python with Neo4j

In [128]:
# This program fetches the required user's facebook data via the free facebook api.
# It currently fetches friend's data, but it can be extended.
# Author: Ajose Opeyemi

# importing the requests library for fetching the required webpage
import requests as r
import pandas as pd
import json

# setting up parameters
# Importing the "friend's" list downloaded from facebook
file = r"C:\Users\Victor\Desktop\Bae\Code files\facebook-Elvicharde\friends_and_followers\friends.json"
with open(file) as friends_data:
    friends = json.load(friends_data)
    friends_data.close()

In [129]:
# Parsing and handling the data

me = "Elvicharde"
friends_list = friends['friends_v2']

for friend in friends_list:
    #cleaning up the imported data
    friend['ID'] = friend.pop('timestamp')
    friend['Friends'] = friend.pop('name')
    
friends_data = pd.DataFrame(friends_list)    #Setting up a dataframe
del friends_data['contact_info']    # Cleaning up the dataframe.
print(friends_data)    # Printing the first 3 rows

             ID                   Friends
0    1627383250               Collins Obi
1    1626987935  Ademola Ebenezer Adeyemi
2    1626987929           Blessing Mathew
3    1626987884               Tosin Akeem
4    1622810997          Olaniyi Akintayo
..          ...                       ...
783  1268394361              Sodique Alim
784  1268329470       OluBorode Damie Ope
785  1268329416         OluwaTosin Fabrio
786  1268245796                 Sorad Jnr
787  1268242864              Demi Dinakin

[788 rows x 2 columns]


## Interfacing with Neo4j for visualization and query

In [130]:
# Creating the connection
from neo4j import GraphDatabase, basic_auth
from random import randint as randint


####### import data into neo4j ##### 
#from neo4j import GraphDatabase

friends_list = friends_data.values.tolist()
commands = []

# illegal_names = [1603558328,1589458329,1576927897,1574813063,1574452201,
#                  1571760211,1548836078,1504036137,1499675661,1451300849,1304871572]

root_user = 'CREATE (root:user {Name:"Elvicharde", ID:"root_node"})'    # The logged-in user/ account holder

commands.append(root_user)    #Adding the user creation and alias to the cypher query list

# Creating friends and matching to the root node
for i in friends_list:
#    if ((i[0] not in illegal_names) and (i[1].find('-') < 0)):
    create_friends = (f'CREATE (f:friend {{Name: "{i[1]}", ID: {i[0]}}})')
    create_relationships = f'MATCH (f:friend{{Name:"{i[1]}"}}), (root:user{{Name:"Elvicharde"}}) CREATE (f)-[:FRIENDS_WITH{{since: {randint(2010, 2021)}}}]->(root) RETURN * LIMIT 1'
    # else:
#     continue
    commands.append(create_friends)
    commands.append(create_relationships)
commands[:5]

['CREATE (root:user {Name:"Elvicharde", ID:"root_node"})',
 'CREATE (f:friend {Name: "Collins Obi", ID: 1627383250})',
 'MATCH (f:friend{Name:"Collins Obi"}), (root:user{Name:"Elvicharde"}) CREATE (f)-[:FRIENDS_WITH{since: 2010}]->(root) RETURN * LIMIT 1',
 'CREATE (f:friend {Name: "Ademola Ebenezer Adeyemi", ID: 1626987935})',
 'MATCH (f:friend{Name:"Ademola Ebenezer Adeyemi"}), (root:user{Name:"Elvicharde"}) CREATE (f)-[:FRIENDS_WITH{since: 2018}]->(root) RETURN * LIMIT 1']

In [131]:
# Connect to database
data_base_connection = GraphDatabase.driver(#uri = #"bolt://54.89.206.251:7687", auth=basic_auth("neo4j", "advancement-characteristic-energy")) #for sandbox connection
uri = 'bolt://localhost:7687', auth = basic_auth('neo4j','similarity')) # for local dbms connection
session = data_base_connection.session()

def execute_commands(commands, session, type = 0):
    if not type:
        # re-setting database to clean status
        if session.run('MATCH (n) RETURN n'):
            database_cleanup = 'MATCH (n) DETACH DELETE (n)'
            session.run(database_cleanup)
    
    # Populating with new data
    for i in commands:
        session.run(i)

        
execute_commands(commands, session)


In [132]:
# Adding more relationships to the existing nodes e.g. Attended $University, Lives in $Location

state_string = '''FC|Abuja AB|Abia AD|Adamawa AK|Akwa_Ibom AN|Anambra BA|Bauchi BY|Bayelsa BE|Benue BO|Borno CR|Cross_River DE|Delta EB|Ebonyi ED|Edo\
 EK|Ekiti EN|Enugu GO|Gombe IM|Imo JI|Jigawa KD|Kaduna KN|Kano KT|Katsina KE|Kebbi KO|Kogi KW|Kwara LA|Lagos NA|Nassarawa NI|Niger OG|Ogun ON|Ondo OS|Osun
OY|Oyo PL|Plateau RI|Rivers SO|Sokoto TA|Taraba YO|Yobe ZA|Zamfara'''

location_node = state_string.split(' ')    # cleaning up the string
state_dict = {}    # Setting up the state dictionary

for i in range(len(location_node)):
    x = location_node[i].split('|')
    state_dict[x[0]] = x[1]

state_dict    # final dictionary for label

{'FC': 'Abuja',
 'AB': 'Abia',
 'AD': 'Adamawa',
 'AK': 'Akwa_Ibom',
 'AN': 'Anambra',
 'BA': 'Bauchi',
 'BY': 'Bayelsa',
 'BE': 'Benue',
 'BO': 'Borno',
 'CR': 'Cross_River',
 'DE': 'Delta',
 'EB': 'Ebonyi',
 'ED': 'Edo',
 'EK': 'Ekiti',
 'EN': 'Enugu',
 'GO': 'Gombe',
 'IM': 'Imo',
 'JI': 'Jigawa',
 'KD': 'Kaduna',
 'KN': 'Kano',
 'KT': 'Katsina',
 'KE': 'Kebbi',
 'KO': 'Kogi',
 'KW': 'Kwara',
 'LA': 'Lagos',
 'NA': 'Nassarawa',
 'NI': 'Niger',
 'OG': 'Ogun',
 'ON': 'Ondo',
 'OS': 'Osun\nOY',
 'PL': 'Plateau',
 'RI': 'Rivers',
 'SO': 'Sokoto',
 'TA': 'Taraba',
 'YO': 'Yobe',
 'ZA': 'Zamfara'}

In [133]:
Uni_string = '''Mewar International University Nasarawa, Adekunle Ajasin University Ondo, Federal University of Agriculture Ogun, \
Obafemi Awolowo University Osun, Abia State University Abia, Joseph Ayo Babalola University Osun, Redeemers University Nigeria Osun, \
Adeleke University Osun Afe Babalola University Ekiti,  Akwa_Ibom State University Akwa_Ibom, Bakassi Technical University Akwa_Ibom, \
American University of Nigeria Adamawa, Abubakar Tafawa Balewa University Bauchi, Adamawa State University Adamawa, Achievers University Ondo'''

Uni_list = [University for University in Uni_string.split(',')]
Uni_dict = {}

for i in range(len(Uni_list)):
    x = Uni_list[i].rpartition(' ')
    Uni_dict[x[2]] = x[0].strip(' ')


Uni_dict

{'Nasarawa': 'Mewar International University',
 'Ondo': 'Achievers University',
 'Ogun': 'Federal University of Agriculture',
 'Osun': 'Redeemers University Nigeria',
 'Abia': 'Abia State University',
 'Ekiti': 'Adeleke University Osun Afe Babalola University',
 'Akwa_Ibom': 'Bakassi Technical University',
 'Adamawa': 'Adamawa State University',
 'Bauchi': 'Abubakar Tafawa Balewa University'}

In [134]:
# Updating properties of nodes

new_commands = []
Universities = list(Uni_dict.keys())
States = list(state_dict.keys())

for i in friends_list:
    x = randint(0,len(Universities)-1)
    y = randint(0,len(States)-1)
    University = Uni_dict[Universities[x]]
    State = state_dict[States[y]]
    Query_1 = f'MATCH (f {{Name: "{i[1]}"}}) SET f.ATTENDED = "{University}" RETURN * LIMIT 1'
    Query_2 = f'MATCH (f {{Name: "{i[1]}"}}) SET f.LIVES_IN = "{State}" RETURN * LIMIT 1'

    new_commands.append(Query_1)
    new_commands.append(Query_2)

new_commands
    

['MATCH (f {Name: "Collins Obi"}) SET f.ATTENDED = "Redeemers University Nigeria" RETURN * LIMIT 1',
 'MATCH (f {Name: "Collins Obi"}) SET f.LIVES_IN = "Ebonyi" RETURN * LIMIT 1',
 'MATCH (f {Name: "Ademola Ebenezer Adeyemi"}) SET f.ATTENDED = "Achievers University" RETURN * LIMIT 1',
 'MATCH (f {Name: "Ademola Ebenezer Adeyemi"}) SET f.LIVES_IN = "Katsina" RETURN * LIMIT 1',
 'MATCH (f {Name: "Blessing Mathew"}) SET f.ATTENDED = "Bakassi Technical University" RETURN * LIMIT 1',
 'MATCH (f {Name: "Blessing Mathew"}) SET f.LIVES_IN = "Kebbi" RETURN * LIMIT 1',
 'MATCH (f {Name: "Tosin Akeem"}) SET f.ATTENDED = "Achievers University" RETURN * LIMIT 1',
 'MATCH (f {Name: "Tosin Akeem"}) SET f.LIVES_IN = "Kaduna" RETURN * LIMIT 1',
 'MATCH (f {Name: "Olaniyi Akintayo"}) SET f.ATTENDED = "Adeleke University Osun Afe Babalola University" RETURN * LIMIT 1',
 'MATCH (f {Name: "Olaniyi Akintayo"}) SET f.LIVES_IN = "Borno" RETURN * LIMIT 1',
 'MATCH (f {Name: "Ilemona Fred Yahaya"}) SET f.ATTEND

In [139]:
# Updating the new reationships
execute_commands(new_commands, session, type = 1)

## Running The Graph algorithms

In [None]:
# First algorithm




# Defining search functions

def cosine_similarity(query, graph, cut_off, limit, relationship):
    '''
    This function queries the input graph for the user-defined query.
    Since similarity is required. The function returns a dictionary
    of the retrieved nodes and their corresponding
    cosine similarity measure with respect to the query. 
    
    '''

    
    pass


def euclidian_distance(query, graph, cut_off, limit):
    '''
    This function queries the input graph for the user-defined query.
    Since similarity is required. The function returns a dictionary
    of the retrieved nodes and their corresponding
    similarity measure (euclidian distance measure) with the query. 
    
    '''
    pass


def node_similarity(query, graph, cut_off, limit):
    '''
    This function queries the input graph for the user-defined query.
    Since similarity is required. The function returns a dictionary
    of the retrieved nodes and their corresponding
    KNN-based similarity measure with the query. 
    
    '''
    pass
