In [1]:
# These are boiler plate imports that seem useful
# Perhaps cleaner would be to delete or comment out the ones that aren't used in this script...

import sys
import json
import csv
import yaml

import os
from dotenv import load_dotenv

import pandas as pd
import numpy as np
import math

import matplotlib as mpl

import itertools

import time
from datetime import datetime
# see https://stackoverflow.com/questions/415511/how-do-i-get-the-current-time-in-python
#   for some basics about datetime

import pprint

# sqlalchemy 2.0 documentation: https://www.sqlalchemy.org/
import psycopg2
from sqlalchemy import create_engine, text as sql_text

# the following is deprecated, it seems, so using the sqlalchemy
# from pyscopg2 import sqlio

# the file in benchmarking/util.py should hold utilities useful for your benchmarking exercise
# In this notebook we have commented out all mentions of util, so that you can run
#    this notebook before setting up your benchmarking/util.py file
sys.path.append('benchmarking/')
import util
# to invoke a function "foo()" inside util.py, use "util.foo()"

In [2]:
dotenv_path = 'benchmarking/.ipynb_checkpoints/variables.env'
load_dotenv(dotenv_path, override=True)

schema = os.getenv("SCHEMA")
port = os.getenv("PORT")
host = os.getenv("HOST")
database = os.getenv("DATABASE")
username = os.getenv("USERNAME")
password = os.getenv("PASSWORD")

In [3]:
# following https://www.geeksforgeeks.org/connecting-postgresql-with-sqlalchemy-in-python/

db_eng = create_engine(f'postgresql+psycopg2://{username}:{password}@{host}:{port}/{database}',
                       connect_args={'options': f'-csearch_path={schema}'},
                       isolation_level = 'SERIALIZABLE')
#    , echo=True)
#    , echo_pool="debug")

print("Successfully created db engine.")

# connect_args is used to set search_path to the schema 'new_york_city' in the airbnb database

# isolation_level SERIALIZABLE makes transactions happen in sequence, which is good 
#      for the benchmarking we will be doing

# for general info on sqlalchemy connections,
#    see: https://docs.sqlalchemy.org/en/20/core/connections.html

# echo from https://docs.sqlalchemy.org/en/20/core/engines.html

Successfully created db engine.


In [4]:
# neighborhoods and indexes
neighbourhoods = ['New Springville', 'Fort Hamilton', 'Long Island City', 'Bedford-Stuyvesant']
neighbourhoods_all_indexes = [['datetime','reviews'], ['neighbourhood','listings']]

neighborhoods_q_dict = {}

for nb in neighbourhoods:
    neighborhoods_q_dict[f'update_datetimes_query_{nb}'] = (util.build_query_reviews_datetime_update(5, '+', 'neighbourhood', nb), util.build_query_reviews_datetime_update(5, '-', 'neighbourhood', nb))

In [None]:
util.run_update_datetime_query_neighbourhoods(db_eng, 50, neighborhoods_q_dict, neighbourhoods_all_indexes, 'update_datetimes_query.json')

In [None]:
# neighbourhood_groups and indexes
neighbourhood_groups = ['Staten Island', 'Bronx', 'Queens', 'Manhattan']
neighborhood_groups_all_indexes = [['datetime','reviews'], ['neighbourhood_group','listings']]

neighborhood_groups_q_dict = {}

for group in neighbourhood_groups:
    neighborhood_groups_q_dict[f'update_datetimes_query_{group}'] = (util.build_query_reviews_datetime_update(5, '+', 'group', group), util.build_query_reviews_datetime_update(5, '-', 'group', group))

In [None]:
util.run_update_datetime_query_neighbourhoods(db_eng, 50, neighborhood_groups_q_dict, neighborhood_groups_all_indexes, 'update_datetimes_query.json')