In [None]:
!pip install opensearch-py


In [None]:
from opensearchpy import OpenSearch

In [None]:
# Connect to OpenSearch domain
host = 'your host'
port = 443
auth = (username, password)
index_name = 'restaurants'

In [None]:
client = OpenSearch(
    hosts=[{'host': host, 'port': port}],
    http_auth=auth,
    use_ssl=True,
    verify_certs=True,
    ssl_assert_hostname=False,
    ssl_show_warn=False
)

In [None]:
# Create the 'restaurants' index
index_body = {
    'settings': {
        'index': {
            'number_of_shards': 1,
            'number_of_replicas': 0
        }
    }
}

In [None]:
response = client.indices.create(index=index_name, body=index_body)
print('Index creation response:', response)

Index creation response: {'acknowledged': True, 'shards_acknowledged': True, 'index': 'restaurants'}


Getting restaurant data scraped in previous sections to clean and create indexes

In [None]:
import json

In [None]:
restaurants = json.load(open('restaurants_data.json'))

In [None]:
restaurants.keys()

dict_keys(['Italian', 'Chinese', 'Mexican', 'Japanese', 'Indian'])

In [None]:
"""As per the updated guidelines on the brightspace, It's recommended we only use 3 cuisine and 50 restaurants per cuisine.
So choosing Indian, Italiana and Mexican Cuisines"""

filtered_retauarants = {}
cuisines = ['Indian', 'Italian', 'Mexican']
for cuisine in cuisines:
    filtered_retauarants[cuisine] = restaurants[cuisine][:50]

In [None]:
for cuisine in filtered_retauarants:
    for restaurant in filtered_retauarants[cuisine]:
        restaurant['Cuisine'] = cuisine
        restaurant['RestaurantID'] = restaurant['id']

In [None]:
all_restaurants = []
for cuisine in filtered_retauarants:
    for restaurant in filtered_retauarants[cuisine]:
        all_restaurants.append({'RestaurantID' : restaurant['RestaurantID'], 'Cuisine' : restaurant['Cuisine']})

In [None]:
all_restaurants[:10]

[{'RestaurantID': 'DGhWO1sUWydVeR5j5ZZaMw', 'Cuisine': 'Indian'},
 {'RestaurantID': '16ZnHpuaaBt92XWeJHCC5A', 'Cuisine': 'Indian'},
 {'RestaurantID': 'hdiuRS9sVZSMReZm4oV5SA', 'Cuisine': 'Indian'},
 {'RestaurantID': 'z5hRX3iJ5Ty_S38iG_WY3Q', 'Cuisine': 'Indian'},
 {'RestaurantID': 'VvsZAnEwU4c8Xkyrzx05Nw', 'Cuisine': 'Indian'},
 {'RestaurantID': 'XsXLVWr1UZWVhKThNvNiaA', 'Cuisine': 'Indian'},
 {'RestaurantID': 'dEOv8_ivdHp85OK_TDQh_g', 'Cuisine': 'Indian'},
 {'RestaurantID': 'klAhw3xLQi9GF1tf_HnS7w', 'Cuisine': 'Indian'},
 {'RestaurantID': 'NN3mOWF5e_pnR1ArqM2bHQ', 'Cuisine': 'Indian'},
 {'RestaurantID': 'Wu0eeQ8Wk_Hjg3cleoxFlw', 'Cuisine': 'Indian'}]

Upload Scraped Data to OpenSearch

In [None]:
for restaurant in all_restaurants:
    response = client.index(index=index_name, body=restaurant, id=restaurant['RestaurantID'])
    print(f'Document {restaurant["RestaurantID"]} inserted:', response)

Document DGhWO1sUWydVeR5j5ZZaMw inserted: {'_index': 'restaurants', '_id': 'DGhWO1sUWydVeR5j5ZZaMw', '_version': 1, 'result': 'created', '_shards': {'total': 1, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1}
Document 16ZnHpuaaBt92XWeJHCC5A inserted: {'_index': 'restaurants', '_id': '16ZnHpuaaBt92XWeJHCC5A', '_version': 1, 'result': 'created', '_shards': {'total': 1, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1}
Document hdiuRS9sVZSMReZm4oV5SA inserted: {'_index': 'restaurants', '_id': 'hdiuRS9sVZSMReZm4oV5SA', '_version': 1, 'result': 'created', '_shards': {'total': 1, 'successful': 1, 'failed': 0}, '_seq_no': 2, '_primary_term': 1}
Document z5hRX3iJ5Ty_S38iG_WY3Q inserted: {'_index': 'restaurants', '_id': 'z5hRX3iJ5Ty_S38iG_WY3Q', '_version': 1, 'result': 'created', '_shards': {'total': 1, 'successful': 1, 'failed': 0}, '_seq_no': 3, '_primary_term': 1}
Document VvsZAnEwU4c8Xkyrzx05Nw inserted: {'_index': 'restaurants', '_id': 'VvsZAnEwU4c8Xkyrzx05