In [1]:
import keras
import numpy as np
import more_itertools
import json
import pandas as pd
from neo4j.v1 import GraphDatabase, Driver

Using TensorFlow backend.
  """


In [4]:
query = """
            MATCH p=
                (u:User)
                    -[:WROTE]->
                (r:Review)
                    -[:REVIEWS]->
                (l:Listing) WHERE EXISTS(l.review_scores_value)
            RETURN 
                u.user_id AS user,
                l.availability_365 AS av365,
                l.availability_90 AS av90,
                l.availability_60 AS av60,
                l.availability_30 AS av30,
                l.cleaning_fee AS cleaning_fee,
                l.security_deposit AS security_deposit,
                l.monthly_price AS monthly_price,
                l.weekly_price AS weekly_price,
                l.square_feet AS square_feet,
                l.beds AS num_beds,
                l.bedrooms AS num_bedrooms,
                l.bathrooms AS num_bathrooms,
                l.accommodates AS accommodates,
                l.price AS price,
                CASE WHEN l.review_scores_value > 9 THEN 1 ELSE 0 END AS y;
        """

In [7]:
query_params = { "dataset_name": "airbnb", "test": False }

In [9]:
settings = { "neo4j_url": "bolt://localhost:7687", "neo4j_user": "neo4j", "neo4j_password": "1234" }

In [11]:
driver = GraphDatabase.driver(settings["neo4j_url"], auth=(settings["neo4j_user"], settings["neo4j_password"]))

In [20]:
with driver.session() as session:
    data = session.run(query, **query_params).data()
    data = [ (np.array([
    i["user"],
    i["av365"] if i["av365"] is not None else 0,
    i["av90"] if i["av90"] is not None else 0,
    i["av60"] if i["av60"] is not None else 0,
    i["av30"] if i["av30"] is not None else 0,
    i["cleaning_fee"] if i["cleaning_fee"] is not None else 0,
    i["security_deposit"] if i["security_deposit"] is not None else 0,
    i["monthly_price"] if i["monthly_price"] is not None else 0,
    i["weekly_price"] if i["weekly_price"] is not None else 0,
    i["square_feet"] if i["square_feet"] is not None else 0,
    i["num_beds"] if i["num_beds"] is not None else 0,
    i["num_bedrooms"] if i["num_bedrooms"] is not None else 0,
    i["accommodates"] if i["accommodates"] is not None else 0,
    i["price"] if i["price"] is not None else 0
    ]),i["y"]) for i in data]

    # Format our batches in the way Keras expects them:
    # An array of tuples (x_batch, y_batch)

    # An x_batch is a numpy array of shape (batch_size, 12), 
    # containing the concatenated style and style_preference vectors. 

    # A y_batch is a numpy array of shape (batch_size,1) containing the review scores.

[(array(['421148', '240', '0', '0', '0', '0', '0', '0', '0', '3600', '3',
         '1', '6', '300.0'], dtype='<U6'), 1),
 (array(['1591491', '312', '37', '16', '16', '0', '0', '0', '0', '0', '1',
         '1', '2', '40.0'], dtype='<U7'), 1),
 (array(['14293694', '312', '37', '16', '16', '0', '0', '0', '0', '0', '1',
         '1', '2', '40.0'], dtype='<U8'), 1),
 (array(['34456311', '364', '89', '59', '29', '0', '0', '0', '275.0', '0',
         '1', '1', '2', '44.0'], dtype='<U8'), 1),
 (array(['36483380', '364', '89', '59', '29', '0', '0', '0', '275.0', '0',
         '1', '1', '2', '44.0'], dtype='<U8'), 1),
 (array(['20062085', '364', '89', '59', '29', '0', '0', '0', '275.0', '0',
         '1', '1', '2', '44.0'], dtype='<U8'), 1),
 (array(['13281665', '364', '89', '59', '29', '0', '0', '0', '275.0', '0',
         '1', '1', '2', '44.0'], dtype='<U8'), 1),
 (array(['2128793', '364', '89', '59', '29', '0', '0', '0', '275.0', '0',
         '1', '1', '2', '44.0'], dtype='<U7'), 1),
 (array