# Parameter Setup

In [1]:
from elasticsearch_dsl import analyzer, Document, Date, Text, Integer, Keyword, Double
from elastic_enterprise_search import AppSearch

import pandas as pd
import logging
import json
import os

In [2]:
# Initialize Elastic App Search
app_search = AppSearch(
    "http://localhost:3002",
    http_auth="private-6jj3ai4ckkq2xykcocosmv6o"
)

In [3]:
# Define a document class using data types from elasticsearch_dsl
class Listing(Document):
    id = Text()
    org_id = Integer()
    listing_url = Text()
    scrape_id = Integer()
    last_scraped = Keyword()
    crawled_date = Date()
    name = Text(analyzer='snowball')
    host_id = Integer()
    host_is_superhost = Keyword()
    host_identity_verified = Text(fields={'raw': Keyword()})
    room_type = Text(fields={'raw': Keyword()})
    accommodates = Integer()
    guests_included = Integer()
    minimum_nights = Integer()
    maximum_nights = Integer()
    calendar_updated = Text(fields={'raw': Keyword()})
    instant_bookable = Keyword()
    is_business_travel_ready = Keyword()
    cancellation_policy = Text(fields={'raw': Keyword()})
    price = Integer()
    availability_30 = Integer()
    availability_60 = Integer()
    availability_90 = Integer()
    availability_365 = Integer()
    number_of_reviews = Integer()
    first_review = Text(fields={'raw': Keyword()})
    last_review = Text(fields={'raw': Keyword()})
    review_scores_rating = Integer()
    review_scores_accuracy = Integer()
    review_scores_cleanliness = Integer()
    review_scores_checkin = Integer()
    review_scores_communication = Integer()
    review_scores_location = Integer()
    review_scores_value = Integer()
    overall_rating = Double()

In [4]:
# Define functions for processing documents
def clean_currency(price):
    
    if '$' in price:
        price = price.replace('$', '')

    if ',' in price:
        price = price.replace(',', '')

    return price

def get_overall_rating(row):
    """ Get overall rating using review fields as target indicator """
    
    review_scores_accuracy = float(row['review_scores_accuracy'])
    review_scores_cleanliness = float(row['review_scores_cleanliness'])
    review_scores_checkin = float(row['review_scores_checkin'])
    review_scores_communication = float(row['review_scores_communication'])
    review_scores_location = float(row['review_scores_location'])
    review_scores_value = float(row['review_scores_value'])

    overall_rating = (((review_scores_accuracy + review_scores_cleanliness \
                      + review_scores_checkin + review_scores_communication \
                      + review_scores_location + review_scores_value) / 2.0) / 6.0)
    return overall_rating

def validate_price(df):
    """ Validate price (if exists), otherwise assign default a value"""
    
    # Convert 'price' to float
    if('price' in df.columns):
        
        # Fill rows with null 'price'
        df['price'].fillna(value='0', inplace=True)

        df['price'] = df['price'].apply(clean_currency).astype('float')

    # Handle data with no 'price', e.g., athens_2020-07-21_data_listings.csv.gz
    elif ('price' not in df.columns):
        
        if ('weekly_price' in df.columns):

            # Fill rows with null 'weekly_price'
            df['weekly_price'].fillna(value='0', inplace=True)
            
            df['weekly_price'] = df['weekly_price'].apply(clean_currency).astype('float')
            df['price'] = df['weekly_price'] / 7.0

        else:
            
            # Set missigng 'price' and 'weekly_price' to 0
            df['price'].fillna(value='0', inplace=True)
            df['weekly_price'].fillna(value='0', inplace=True)

    return df

def get_crawled_date(df):
    """ Extract crawled date from the 'scrape_id' field. """

    df['crawled_date'] = df['scrape_id'].astype(str)
    df['crawled_date'] = df['crawled_date'].apply(lambda x: x[:8])
    
    return df

def gen_missing_columns(df):
    """ Extract 'guests_included' from the 'accommodates' field. """

    if 'host_is_superhost' not in df.columns:
        df['host_is_superhost'] = "f"

    if 'host_identity_verified' not in df.columns:
        df['host_identity_verified'] = "f"

    if 'room_type' not in df.columns:
        df['room_type'] = "n/a"

    if 'accommodates' not in df.columns:
        df['accommodates'] = 0

    if 'guests_included' not in df.columns:
        df['guests_included'] = df['accommodates']

    if 'minimum_nights' not in df.columns:
        df['minimum_nights'] = 0

    if 'maximum_nights' not in df.columns:
        df['maximum_nights'] = 0

    if 'calendar_updated' not in df.columns:
        df['calendar_updated'] = "n/a"

    if 'instant_bookable' not in df.columns:
        df['instant_bookable'] = "f"

    if 'is_business_travel_ready' not in df.columns:
        df['is_business_travel_ready'] = "f"

    if 'cancellation_policy' not in df.columns:
        df['cancellation_policy'] = "n/a"

    return df

def get_features(df):
    """ Select specific columns and convert date columnd to string. """
    
    df = df[
            [ 
                'id', 'listing_url', 'scrape_id', 'last_scraped', 'crawled_date', 
                'name', 'host_id', 'host_is_superhost', 'host_identity_verified', 
                'room_type', 'accommodates', 'guests_included','minimum_nights', 
                'maximum_nights', 'calendar_updated', 'instant_bookable', 'is_business_travel_ready', 'cancellation_policy',
                'price', 'availability_30', 'availability_60', 'availability_90', 'availability_365', 
                'number_of_reviews', 'first_review', 'last_review', 'review_scores_rating', 
                'review_scores_accuracy', 'review_scores_cleanliness', 'review_scores_checkin', 
                'review_scores_communication', 'review_scores_location', 'review_scores_value'
            ]
    ]
    
    return df

def validate_reviews(df):
    """ Enrich no review records with default review scores. """
    
    df['first_review'].fillna(value='1991-01-01', inplace=True)
    df['last_review'].fillna(value='0', inplace=True)
    df['review_scores_rating'].fillna(value=0, inplace=True)
    df['review_scores_accuracy'].fillna(value=0, inplace=True)
    df['review_scores_accuracy'].fillna(value=0, inplace=True)
    df['review_scores_cleanliness'].fillna(value=0, inplace=True)
    df['review_scores_checkin'].fillna(value=0, inplace=True)
    df['review_scores_communication'].fillna(value=0, inplace=True)
    df['review_scores_location'].fillna(value=0, inplace=True)
    df['review_scores_value'].fillna(value=0, inplace=True)

    return df

def drop_null_values(df):
    """ Drop records with NaN values. """
    
    df = df.dropna()

    return df

def fill_null_values(df):
    """ Fill records with NaN values. """
    
    df['listing_url'].fillna(value=' ', inplace=True)
    df['scrape_id'].fillna(value=0, inplace=True)
    df['last_scraped'].fillna(value='1991-01-01', inplace=True)
    df['crawled_date'].fillna(value='1991-01-01', inplace=True)
    df['name'].fillna(value=' ', inplace=True)
    df['host_id'].fillna(value=0, inplace=True)
    df['host_is_superhost'].fillna(value=' ', inplace=True)
    df['host_identity_verified'].fillna(value=' ', inplace=True)
    df['room_type'].fillna(value=' ', inplace=True)
    df['accommodates'].fillna(value=0, inplace=True)
    df['guests_included'].fillna(value=0, inplace=True)
    df['minimum_nights'].fillna(value=0, inplace=True)
    df['maximum_nights'].fillna(value=0, inplace=True)
    df['calendar_updated'].fillna(value=' ', inplace=True)
    df['instant_bookable'].fillna(value=' ', inplace=True)
    df['is_business_travel_ready'].fillna(value=' ', inplace=True)
    df['cancellation_policy'].fillna(value=' ', inplace=True)
    df['price'].fillna(value=0, inplace=True)
    df['availability_30'].fillna(value=0, inplace=True)
    df['availability_60'].fillna(value=0, inplace=True)
    df['availability_90'].fillna(value=0, inplace=True)
    df['availability_365'].fillna(value=0, inplace=True)
    df['number_of_reviews'].fillna(value=0, inplace=True)

    return df

In [5]:
# Define a function for indexing documents
def ingest_data(df, index, total_docs):
    """ Finalize data and ingest a bulk of documents to ES index """

    try:
        i = 0
        bulk_size = 100
        docs = []
        
        for _, row in df.iterrows():
            
            # Count the number of documents
            i += 1
            
            doc = Listing()
            
            if ('id' in row) and ('crawled_date' in row):
                org_id = row['id']
                crawled_date = row['crawled_date']
                
                # Generate an unique ID by concating the orignal ID with crawled date
                doc.id = str(org_id) + "-" + crawled_date
                
                doc.org_id = org_id
                doc.crawled_date = crawled_date
            if 'listing_url' in row:
                doc.listing_url = row['listing_url']
            if 'scrape_id' in row:
                doc.scrape_id = row['scrape_id']
            if 'last_scraped' in row:
                doc.last_scraped = str(row['last_scraped']).replace("-", "")
            if 'name' in row:
                doc.name = row['name']
            if 'host_id' in row:
                doc.host_id = row['host_id']
            if 'host_is_superhost' in row:
                doc.host_is_superhost = row['host_is_superhost']
            if 'host_identity_verified'in row:
                doc.host_identity_verified = row['host_identity_verified']
            if 'room_type' in row:
                doc.room_type = row['room_type']
            if 'accommodates' in row:
                doc.accommodates = row['accommodates']
            if 'guests_included' in row:
                doc.guests_included = row['guests_included']
            if 'minimum_nights' in row:
                doc.minimum_nights = row['minimum_nights']
            if 'maximum_nights' in row:
                doc.maximum_nights = row['maximum_nights']
            if 'calendar_updated' in row:
                doc.calendar_updated = row['calendar_updated']
            if 'instant_bookable' in row:
                doc.instant_bookable = row['instant_bookable']
            if 'is_business_travel_ready' in row:
                doc.is_business_travel_ready = row['is_business_travel_ready']
            if 'cancellation_policy' in row:
                doc.cancellation_policy = row['cancellation_policy']
            if 'price' in row:
                doc.price = row['price']
            if 'availability_30' in row:
                doc.availability_30 = row['availability_30']
            if 'availability_60' in row:
                doc.availability_60 = row['availability_60']
            if 'availability_90' in row:
                doc.availability_90 = row['availability_90']
            if 'availability_365' in row:
                doc.availability_365 = row['availability_365']
            if 'number_of_reviews' in row:
                doc.number_of_reviews = row['number_of_reviews']
            if 'first_review' in row:
                doc.first_review = str(row['first_review']).replace("-", "")
            if 'last_review' in row:
                doc.last_review = str(row['last_review']).replace("-", "")
            if 'review_scores_rating' in row:
                doc.review_scores_rating = row['review_scores_rating']
            if 'review_scores_accuracy' in row:
                doc.review_scores_accuracy = row['review_scores_accuracy']
            if 'review_scores_cleanliness' in row:
                doc.review_scores_cleanliness = row['review_scores_cleanliness']
            if 'review_scores_checkin' in row:
                doc.review_scores_checkin = row['review_scores_checkin']
            if 'review_scores_communication' in row:
                doc.review_scores_communication = row['review_scores_communication']
            if 'review_scores_location' in row:
                doc.review_scores_location = row['review_scores_location']
            if 'review_scores_value' in row:
                doc.review_scores_value = row['review_scores_value']
          
            # Compute overall_rating by averaging all reviews scores
            overall_rating = get_overall_rating(row)
            doc.overall_rating = overall_rating

            # Append the current document to a list of documents
            docs.append(doc.to_dict(include_meta=False))
            
            # Ingest a bulk of documents into the current index
            if ((i % bulk_size) == 0) or (i == total_docs):
                app_search.index_documents(
                    engine_name=index,
                    documents=docs
                )
                
                # Reset the list of documents
                docs = []
            
    except Exception:
        logging.error('exception occured', exc_info=True)

In [6]:
# Index listing documents crawled between 2019 - 2020
try:
    unique_list = [] 

    print("Start indexing ...")
    path = '/Users/nattiya/Desktop/WayBack_InsideAirBNB/'

    for file in sorted(os.listdir(path)):
        
        # Top 10 cities by active listings (https://www.alltherooms.com/analytics/airbnb-statistics/):
        if (file.startswith("london") or file.startswith("paris") or file.startswith("new-york-city") or file.startswith("rome") or file.startswith("rio-de-janeiro") or file.startswith("buenos-aires") or file.startswith("sydney") or file.startswith("mexico-city") or file.startswith("barcelona")) and (("2019-" in file) or ("2020-" in file)) and file.endswith(".csv.gz"):
            
            # Extract city name from file
            name = file.find("_")
            city = file[0:name].lower()

            # Load original listing data
            df = pd.read_csv(path + file, compression='gzip')

            # Pre-process raw data
            # Step 1: Enrich raw data with price and crawled date
            df = validate_price(df)
            df = get_crawled_date(df)
            df = gen_missing_columns(df)
            raw_count = len(df)

            # Step 2: Assign ratings to listings with no reviews
            df = get_features(df)
            df = validate_reviews(df)
            review_count = len(df)

            # Step 3: Drop records with null values
            #df = drop_null_values(df)
            df = fill_null_values(df)
            final_count = len(df)

            # Obtain the index name
            index_name = 'airbnb-history-' + city

            # Check if the city is seen for the first time 
            if index_name not in unique_list:

                print("\tCreating a new index with %d documents loaded from file: %s" % (final_count, file))

                unique_list.append(index_name)

                # Initialize index (only perform once)
                resp = app_search.create_engine(
                    engine_name=index_name,
                    language="en"
                )
                
                # Index documents loaded from the current snapshot
                ingest_data(df, index=index_name, total_docs=final_count)
                
                # Updating schema
                resp = app_search.put_schema(
                    engine_name=index_name,
                    schema={
                      "accommodates": "number",
                      "availability_30": "number",
                      "availability_365": "number",
                      "availability_60": "number",
                      "availability_90": "number",
                      "guests_included": "number",
                      "maximum_nights": "number",
                      "minimum_nights": "number",
                      "number_of_reviews": "number",
                      "overall_rating": "number",
                      "price": "number",
                      "review_scores_accuracy": "number",
                      "review_scores_checkin": "number",
                      "review_scores_cleanliness": "number",
                      "review_scores_communication": "number",
                      "review_scores_location": "number",
                      "review_scores_rating": "number",
                      "review_scores_value": "number",
                      "calendar_updated": "text",
                      "cancellation_policy": "text",
                      "crawled_date": "text",
                      "first_review": "text",
                      "host_id": "text",
                      "host_identity_verified": "text",
                      "host_is_superhost": "text",
                      "instant_bookable": "text",
                      "is_business_travel_ready": "text",
                      "last_review": "text",
                      "last_scraped": "text",
                      "listing_url": "text",
                      "name": "text",
                      "room_type": "text",
                      "scrape_id": "t
                        ext"
                    }
                )

            else:
                print("\tUpdating an existing index with %d documents loaded from file: %s" % (final_count, file))
                
                ingest_data(df, index=index_name, total_docs=final_count)
                
    print("Finished indexing ...")

except Exception:
    logging.error('exception occured', exc_info=True)

Start indexing ...


  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


	Creating a new index with 18033 documents loaded from file: barcelona_2019-01-14_data_listings.csv.gz


  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


	Updating an existing index with 17763 documents loaded from file: barcelona_2019-02-06_data_listings.csv.gz
	Updating an existing index with 17807 documents loaded from file: barcelona_2019-03-08_data_listings.csv.gz
	Updating an existing index with 17899 documents loaded from file: barcelona_2019-04-10_data_listings.csv.gz
	Updating an existing index with 18302 documents loaded from file: barcelona_2019-05-14_data_listings.csv.gz
	Updating an existing index with 18837 documents loaded from file: barcelona_2019-06-07_data_listings.csv.gz
	Updating an existing index with 19833 documents loaded from file: barcelona_2019-07-10_data_listings.csv.gz
	Updating an existing index with 20556 documents loaded from file: barcelona_2019-08-12_data_listings.csv.gz
	Updating an existing index with 20404 documents loaded from file: barcelona_2019-09-17_data_listings.csv.gz
	Updating an existing index with 20147 documents loaded from file: barcelona_2019-10-16_data_listings.csv.gz
	Updating an existi

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


	Creating a new index with 18222 documents loaded from file: buenos-aires_2019-03-15_data_listings.csv.gz
	Updating an existing index with 18708 documents loaded from file: buenos-aires_2019-04-17_data_listings.csv.gz
	Updating an existing index with 20715 documents loaded from file: buenos-aires_2019-07-16_data_listings.csv.gz
	Updating an existing index with 21228 documents loaded from file: buenos-aires_2019-08-27_data_listings.csv.gz
	Updating an existing index with 21646 documents loaded from file: buenos-aires_2019-09-24_data_listings.csv.gz
	Updating an existing index with 21968 documents loaded from file: buenos-aires_2019-10-20_data_listings.csv.gz
	Updating an existing index with 22877 documents loaded from file: buenos-aires_2019-11-25_data_listings.csv.gz
	Updating an existing index with 23605 documents loaded from file: buenos-aires_2019-12-27_data_listings.csv.gz
	Updating an existing index with 24083 documents loaded from file: buenos-aires_2020-01-23_data_listings.csv.g

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


	Updating an existing index with 23828 documents loaded from file: buenos-aires_2020-05-25_data_listings.csv.gz
	Updating an existing index with 24134 documents loaded from file: buenos-aires_2020-06-21_data_listings.csv.gz
	Updating an existing index with 20373 documents loaded from file: buenos-aires_2020-10-26_data_listings.csv.gz
	Updating an existing index with 20545 documents loaded from file: buenos-aires_2020-11-27_data_listings.csv.gz
	Updating an existing index with 20636 documents loaded from file: buenos-aires_2020-12-24_data_listings.csv.gz
	Creating a new index with 77459 documents loaded from file: london_2019-01-13_data_listings.csv.gz


  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


	Updating an existing index with 78415 documents loaded from file: london_2019-02-05_data_listings.csv.gz
	Updating an existing index with 79129 documents loaded from file: london_2019-03-07_data_listings.csv.gz
	Updating an existing index with 79671 documents loaded from file: london_2019-04-09_data_listings.csv.gz
	Updating an existing index with 80767 documents loaded from file: london_2019-05-05_data_listings.csv.gz
	Updating an existing index with 82029 documents loaded from file: london_2019-06-05_data_listings.csv.gz
	Updating an existing index with 83850 documents loaded from file: london_2019-07-10_data_listings.csv.gz
	Updating an existing index with 85918 documents loaded from file: london_2019-08-09_data_listings.csv.gz
	Updating an existing index with 85273 documents loaded from file: london_2019-09-14_data_listings.csv.gz
	Updating an existing index with 83887 documents loaded from file: london_2019-10-15_data_listings.csv.gz
	Updating an existing index with 85068 documen

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


	Updating an existing index with 81434 documents loaded from file: london_2020-07-14_data_listings.csv.gz
	Updating an existing index with 74186 documents loaded from file: london_2020-08-24_data_listings.csv.gz
	Updating an existing index with 77591 documents loaded from file: london_2020-09-11_data_listings.csv.gz
	Updating an existing index with 76619 documents loaded from file: london_2020-10-13_data_listings.csv.gz
	Updating an existing index with 76984 documents loaded from file: london_2020-11-06_data_listings.csv.gz
	Updating an existing index with 77136 documents loaded from file: london_2020-12-16_data_listings.csv.gz


  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


	Creating a new index with 17229 documents loaded from file: mexico-city_2019-03-15_data_listings.csv.gz
	Updating an existing index with 17868 documents loaded from file: mexico-city_2019-04-17_data_listings.csv.gz
	Updating an existing index with 18348 documents loaded from file: mexico-city_2019-05-22_data_listings.csv.gz
	Updating an existing index with 19030 documents loaded from file: mexico-city_2019-06-24_data_listings.csv.gz
	Updating an existing index with 19357 documents loaded from file: mexico-city_2019-07-16_data_listings.csv.gz
	Updating an existing index with 20037 documents loaded from file: mexico-city_2019-08-22_data_listings.csv.gz
	Updating an existing index with 20273 documents loaded from file: mexico-city_2019-09-24_data_listings.csv.gz
	Updating an existing index with 20568 documents loaded from file: mexico-city_2019-10-20_data_listings.csv.gz
	Updating an existing index with 20571 documents loaded from file: mexico-city_2019-11-25_data_listings.csv.gz
	Updati

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


	Creating a new index with 50717 documents loaded from file: new-york-city_2019-01-09_data_listings.csv.gz
	Updating an existing index with 50228 documents loaded from file: new-york-city_2019-02-01_data_listings.csv.gz
	Updating an existing index with 49748 documents loaded from file: new-york-city_2019-03-06_data_listings.csv.gz


  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


	Updating an existing index with 49466 documents loaded from file: new-york-city_2019-04-03_data_listings.csv.gz


  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


	Updating an existing index with 48941 documents loaded from file: new-york-city_2019-05-03_data_listings.csv.gz
	Updating an existing index with 48801 documents loaded from file: new-york-city_2019-06-02_data_listings.csv.gz
	Updating an existing index with 48895 documents loaded from file: new-york-city_2019-07-08_data_listings.csv.gz
	Updating an existing index with 48864 documents loaded from file: new-york-city_2019-08-06_data_listings.csv.gz
	Updating an existing index with 48377 documents loaded from file: new-york-city_2019-09-12_data_listings.csv.gz
	Updating an existing index with 48602 documents loaded from file: new-york-city_2019-10-14_data_listings.csv.gz
	Updating an existing index with 49281 documents loaded from file: new-york-city_2019-11-01_data_listings.csv.gz
	Updating an existing index with 50599 documents loaded from file: new-york-city_2019-12-04_data_listings.csv.gz
	Updating an existing index with 51361 documents loaded from file: new-york-city_2020-01-03_data

POST http://localhost:3002/api/as/v1/engines/airbnb-history-new-york-city/documents [status:503 request:0.440s]
POST http://localhost:3002/api/as/v1/engines/airbnb-history-new-york-city/documents [status:503 request:0.303s]
POST http://localhost:3002/api/as/v1/engines/airbnb-history-new-york-city/documents [status:503 request:0.267s]
POST http://localhost:3002/api/as/v1/engines/airbnb-history-new-york-city/documents [status:503 request:0.286s]
ERROR:root:exception occured
Traceback (most recent call last):
  File "<ipython-input-5-bcfd4207c6b0>", line 98, in ingest_data
    app_search.index_documents(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_enterprise_search/client/_app_search.py", line 573, in index_documents
    return self.perform_request(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_enterprise_search/client/_base.py", line 187, in perform_request
    return self.transport.perform_request(
  File "/Users/nattiya/

	Updating an existing index with 48588 documents loaded from file: new-york-city_2020-07-07_data_listings.csv.gz


ERROR:root:exception occured
Traceback (most recent call last):
  File "<ipython-input-5-bcfd4207c6b0>", line 98, in ingest_data
    app_search.index_documents(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_enterprise_search/client/_app_search.py", line 573, in index_documents
    return self.perform_request(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_enterprise_search/client/_base.py", line 187, in perform_request
    return self.transport.perform_request(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_transport/transport.py", line 311, in perform_request
    resp_status, resp_headers, data = connection.perform_request(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_transport/connection/http_urllib3.py", line 251, in perform_request
    self._raise_error(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_transport/connection/base.

	Updating an existing index with 46527 documents loaded from file: new-york-city_2020-08-15_data_listings.csv.gz


ERROR:root:exception occured
Traceback (most recent call last):
  File "<ipython-input-5-bcfd4207c6b0>", line 98, in ingest_data
    app_search.index_documents(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_enterprise_search/client/_app_search.py", line 573, in index_documents
    return self.perform_request(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_enterprise_search/client/_base.py", line 187, in perform_request
    return self.transport.perform_request(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_transport/transport.py", line 311, in perform_request
    resp_status, resp_headers, data = connection.perform_request(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_transport/connection/http_urllib3.py", line 251, in perform_request
    self._raise_error(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_transport/connection/base.

	Updating an existing index with 45756 documents loaded from file: new-york-city_2020-09-07_data_listings.csv.gz


ERROR:root:exception occured
Traceback (most recent call last):
  File "<ipython-input-5-bcfd4207c6b0>", line 98, in ingest_data
    app_search.index_documents(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_enterprise_search/client/_app_search.py", line 573, in index_documents
    return self.perform_request(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_enterprise_search/client/_base.py", line 187, in perform_request
    return self.transport.perform_request(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_transport/transport.py", line 311, in perform_request
    resp_status, resp_headers, data = connection.perform_request(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_transport/connection/http_urllib3.py", line 251, in perform_request
    self._raise_error(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_transport/connection/base.

	Updating an existing index with 44666 documents loaded from file: new-york-city_2020-10-05_data_listings.csv.gz


ERROR:root:exception occured
Traceback (most recent call last):
  File "<ipython-input-5-bcfd4207c6b0>", line 98, in ingest_data
    app_search.index_documents(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_enterprise_search/client/_app_search.py", line 573, in index_documents
    return self.perform_request(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_enterprise_search/client/_base.py", line 187, in perform_request
    return self.transport.perform_request(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_transport/transport.py", line 311, in perform_request
    resp_status, resp_headers, data = connection.perform_request(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_transport/connection/http_urllib3.py", line 251, in perform_request
    self._raise_error(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_transport/connection/base.

	Updating an existing index with 44497 documents loaded from file: new-york-city_2020-11-02_data_listings.csv.gz


ERROR:root:exception occured
Traceback (most recent call last):
  File "<ipython-input-5-bcfd4207c6b0>", line 98, in ingest_data
    app_search.index_documents(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_enterprise_search/client/_app_search.py", line 573, in index_documents
    return self.perform_request(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_enterprise_search/client/_base.py", line 187, in perform_request
    return self.transport.perform_request(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_transport/transport.py", line 311, in perform_request
    resp_status, resp_headers, data = connection.perform_request(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_transport/connection/http_urllib3.py", line 251, in perform_request
    self._raise_error(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_transport/connection/base.

	Updating an existing index with 36923 documents loaded from file: new-york-city_2020-12-10_data_listings.csv.gz


ERROR:root:exception occured
Traceback (most recent call last):
  File "<ipython-input-5-bcfd4207c6b0>", line 98, in ingest_data
    app_search.index_documents(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_enterprise_search/client/_app_search.py", line 573, in index_documents
    return self.perform_request(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_enterprise_search/client/_base.py", line 187, in perform_request
    return self.transport.perform_request(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_transport/transport.py", line 311, in perform_request
    resp_status, resp_headers, data = connection.perform_request(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_transport/connection/http_urllib3.py", line 251, in perform_request
    self._raise_error(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_transport/connection/base.

	Creating a new index with 58359 documents loaded from file: paris_2019-01-13_data_listings.csv.gz


ERROR:root:exception occured
Traceback (most recent call last):
  File "<ipython-input-6-d30978d9b4a3>", line 48, in <module>
    resp = app_search.create_engine(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_enterprise_search/client/_app_search.py", line 733, in create_engine
    return self.perform_request(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_enterprise_search/client/_base.py", line 187, in perform_request
    return self.transport.perform_request(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_transport/transport.py", line 311, in perform_request
    resp_status, resp_headers, data = connection.perform_request(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_transport/connection/http_urllib3.py", line 251, in perform_request
    self._raise_error(
  File "/Users/nattiya/miniconda3/envs/dl/lib/python3.8/site-packages/elastic_transport/connection/base.