In [1]:
import json

In [2]:

def read_jsonl(file_path: str):
    """
    Read a JSON Lines file and return its contents as a list of dictionaries.

    Args:
        file_path (str): Path to the JSON Lines file.

    Returns:
        List[dict]: A list of dictionaries representing the contents of the JSON Lines file.
    """
    with open(file_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        json_list = [json.loads(line) for line in lines]
    return json_list

In [3]:
input = read_jsonl('172222.jsonl')

input

[{'server': [],
  'initializer': [['ExperimentsDataToken',
    {'hawaii20_taller_where_panel_2024_moweb': {'subject': 'identity',
      'buckets': 100,
      'percent_exposed': 100,
      'treatments': [{'name': 'control', 'buckets': 50},
       {'name': 'treatment', 'buckets': 50}],
      'hashing_key': 'hawaii20_taller_where_panel_2024',
      'sitar_overrides': {},
      'trebuchets': []},
     'installed_pwa': {'subject': 'visitor',
      'buckets': 2,
      'percent_exposed': 10,
      'treatments': [{'name': 'control', 'buckets': 1},
       {'name': 'treatment', 'buckets': 1}],
      'hashing_key': 'installed_pwa',
      'sitar_overrides': {},
      'trebuchets': []},
     'installed_pwa_parallel': {'subject': 'visitor',
      'buckets': 2,
      'percent_exposed': 10,
      'treatments': [{'name': 'control', 'buckets': 1},
       {'name': 'treatment', 'buckets': 1}],
      'hashing_key': 'installed_pwa_parallel',
      'sitar_overrides': {},
      'trebuchets': []},
     'experi

In [4]:
def get_listing_presentation(listing_info_json: object, listing_id: str) -> object:
    """
    Extract the presentation data from the listing information JSON.

    Args:
        listing_info_json (object): The JSON object containing listing information.
        listing_id (str): The ID of the listing.

    Returns:
        object: The presentation data or None if extraction fails.
    """
    try:
        # Extract the presentation data
        presentation = listing_info_json[0]['root > core-guest-spa'][1][1]['niobeMinimalClientData'][1][1]['data'][
            'presentation']
    except Exception as ex:
        # logging.error(f"{listing_id=} can't get presentation. Error: {ex.with_traceback()}")
        return

    return presentation

In [5]:
parcing_dict = get_listing_presentation(input, listing_id='172222')

with open('172222_expected_presentation.jsonl', 'w') as file:  
    file.write(json.dumps(parcing_dict) + '\n')

## Expected parsed result

In [6]:
parsed_result = read_jsonl('172222_parsed_result.jsonl')[0]
parsed_result

{'listing_id': '172222',
 'checking_in_and_out_house_rule': ['Check-in after 4:00 p.m.',
  'Checkout before 11:00 a.m.',
  'Self check-in with keypad'],
 'during_your_stay_house_rule': ['2 guests maximum',
  'No pets',
  'Quiet hours: 11:00 p.m.–7:00 a.m.',
  'No parties or events',
  'No commercial photography',
  'No smoking',
  'Additional rules',
  'Guests must load AirBnB app into phone.'],
 'before_you_leave_house_rule': ['Additional requests: How’s your wonderful day? %%% Check Out Notes: %%% • 11 am check out. %%% • close & lock window. %%% • turn off lights. %%% • leave key in room & lock door.  %%% • leave cash payment for snacks, water, pop, etc.  %%% Thank You. %%% Happy Travels! %%% Tammy & Will'],
 'bathroom_amenities': ['Bathtub',
  'Hair dryer',
  'Shampoo',
  'Body soap',
  'Hot water',
  'Shower gel'],
 'bedroom_and_laundry_amenities': ['Essentials',
  'Hangers',
  'Bed linens',
  'Iron'],
 'family_amenities': ['Outlet covers'],
 'heating_and_cooling_amenities': ['Ind

## Expected parsed house_rules

In [7]:
expected_house_rules = dict((key, parsed_result[key]) for key in parsed_result.keys() if key.endswith('_house_rule'))
expected_house_rules

{'checking_in_and_out_house_rule': ['Check-in after 4:00 p.m.',
  'Checkout before 11:00 a.m.',
  'Self check-in with keypad'],
 'during_your_stay_house_rule': ['2 guests maximum',
  'No pets',
  'Quiet hours: 11:00 p.m.–7:00 a.m.',
  'No parties or events',
  'No commercial photography',
  'No smoking',
  'Additional rules',
  'Guests must load AirBnB app into phone.'],
 'before_you_leave_house_rule': ['Additional requests: How’s your wonderful day? %%% Check Out Notes: %%% • 11 am check out. %%% • close & lock window. %%% • turn off lights. %%% • leave key in room & lock door.  %%% • leave cash payment for snacks, water, pop, etc.  %%% Thank You. %%% Happy Travels! %%% Tammy & Will']}

## Expected parsed amenities

In [8]:
expected_amenities = dict((key, parsed_result[key]) for key in parsed_result.keys() if key.endswith('_amenities'))
expected_amenities

{'bathroom_amenities': ['Bathtub',
  'Hair dryer',
  'Shampoo',
  'Body soap',
  'Hot water',
  'Shower gel'],
 'bedroom_and_laundry_amenities': ['Essentials',
  'Hangers',
  'Bed linens',
  'Iron'],
 'family_amenities': ['Outlet covers'],
 'heating_and_cooling_amenities': ['Indoor fireplace', 'Heating'],
 'privacy_and_safety_amenities': ['Lock on bedroom door',
  'Exterior security cameras on property',
  'Smoke alarm',
  'Fire extinguisher',
  'First aid kit'],
 'internet_and_office_amenities': ['Wifi', 'Dedicated workspace'],
 'kitchen_and_dining_amenities': ['Refrigerator',
  'Microwave',
  'Dishes and silverware',
  'Mini fridge',
  'Dishwasher',
  'Hot water kettle',
  'Coffee maker',
  'Toaster',
  'Kitchenette',
  'Dining table'],
 'outdoor_amenities': ['Shared patio or balcony',
  'Shared backyard – Fully fenced',
  'Outdoor furniture'],
 'parking_and_facilities_amenities': ['Free driveway parking on premises – 2 spaces',
  'Free street parking',
  'Shared pool – available sea

## Expected parsed description

In [9]:
expected_description = dict((key, parsed_result[key]) for key in parsed_result.keys() if key.endswith('_description'))
expected_description

{'place_description': '24,000+ HAPPY GUESTS (all wonderful guests, except for that one ;) <br /><br />* WE ARE LICENSED & INSURED <br /><br />2017 North American Best Guest House (top 10)<br /><br />Exodus Adventures Award of Distinction 2016.<br /><br />2019 West Coast Guest House (Best Value)<br /><br />2015 Global Nomads Top 100 Honors<br /><br />2014 Wanderlust Destination Award<br /><br />" Voted Vancouver\'s Best BnB (website hidden) 2013"<br /><br />"Family friendly BnB "<br /><br />”it reminds us of home” - Brigitte & Gerard, Paris, France (2011)',
 'the_space_description': "If you're looking for a clean room in a safe and convenient neighborhood close to both the airport as well as the bustling culture and plethora of dining options that central Richmond has to offer, then stay at Dejavu!<br /><br />We are just steps to the bus stop. From there it's a 5-minute ride to Richmond Center and the Canada Line which whizzes you downtown in less than half an hour. We are less than 10 

## Another test example

In [10]:
input = read_jsonl('888757305688084444.jsonl')

input

[{'server': [],
  'initializer': [['ExperimentsDataToken',
    {'hawaii20_taller_where_panel_2024_moweb': {'subject': 'identity',
      'buckets': 100,
      'percent_exposed': 100,
      'treatments': [{'name': 'control', 'buckets': 50},
       {'name': 'treatment', 'buckets': 50}],
      'hashing_key': 'hawaii20_taller_where_panel_2024',
      'sitar_overrides': {},
      'trebuchets': []},
     'installed_pwa': {'subject': 'visitor',
      'buckets': 2,
      'percent_exposed': 10,
      'treatments': [{'name': 'control', 'buckets': 1},
       {'name': 'treatment', 'buckets': 1}],
      'hashing_key': 'installed_pwa',
      'sitar_overrides': {},
      'trebuchets': []},
     'installed_pwa_parallel': {'subject': 'visitor',
      'buckets': 2,
      'percent_exposed': 10,
      'treatments': [{'name': 'control', 'buckets': 1},
       {'name': 'treatment', 'buckets': 1}],
      'hashing_key': 'installed_pwa_parallel',
      'sitar_overrides': {},
      'trebuchets': []},
     'experi

In [11]:
parcing_dict = get_listing_presentation(input, listing_id='888757305688084444')

with open('888757305688084444_expected_presentation.jsonl', 'w') as file:  
    file.write(json.dumps(parcing_dict) + '\n')

In [12]:
parsed_result = read_jsonl('888757305688084444_parsed_result.jsonl')[0]
parsed_result

{'listing_id': '888757305688084444',
 'checking_in_and_out_house_rule': ['Check-in after 3:00 p.m.',
  'Checkout before 11:00 a.m.',
  'Self check-in with keypad'],
 'during_your_stay_house_rule': ['4 guests maximum',
  'No pets',
  'Quiet hours: 11:00 p.m.–6:00 a.m.',
  'No parties or events',
  'No commercial photography',
  'No smoking',
  'Additional rules',
  'Treat the place like your own.'],
 'scenic_views_amenities': ['Garden view'],
 'bathroom_amenities': ['Hair dryer',
  'Cleaning products',
  'Shampoo',
  'Conditioner',
  'Body soap',
  'Bidet',
  'Hot water'],
 'bedroom_and_laundry_amenities': ['Washer',
  'Free dryer – In unit',
  'Essentials',
  'Hangers',
  'Bed linens',
  'Extra pillows and blankets',
  'Room-darkening shades',
  'Clothing storage: closet'],
 'entertainment_amenities': ['TV'],
 'family_amenities': ['Pack ’n play/Travel crib – available upon request',
  'Standalone high chair – available upon request',
  'Children’s dinnerware'],
 'heating_and_cooling_am

In [13]:
expected_house_rules = dict((key, parsed_result[key]) for key in parsed_result.keys() if key.endswith('_house_rule'))
expected_house_rules

{'checking_in_and_out_house_rule': ['Check-in after 3:00 p.m.',
  'Checkout before 11:00 a.m.',
  'Self check-in with keypad'],
 'during_your_stay_house_rule': ['4 guests maximum',
  'No pets',
  'Quiet hours: 11:00 p.m.–6:00 a.m.',
  'No parties or events',
  'No commercial photography',
  'No smoking',
  'Additional rules',
  'Treat the place like your own.']}

In [14]:
expected_amenities = dict((key, parsed_result[key]) for key in parsed_result.keys() if key.endswith('_amenities'))
expected_amenities

{'scenic_views_amenities': ['Garden view'],
 'bathroom_amenities': ['Hair dryer',
  'Cleaning products',
  'Shampoo',
  'Conditioner',
  'Body soap',
  'Bidet',
  'Hot water'],
 'bedroom_and_laundry_amenities': ['Washer',
  'Free dryer – In unit',
  'Essentials',
  'Hangers',
  'Bed linens',
  'Extra pillows and blankets',
  'Room-darkening shades',
  'Clothing storage: closet'],
 'entertainment_amenities': ['TV'],
 'family_amenities': ['Pack ’n play/Travel crib – available upon request',
  'Standalone high chair – available upon request',
  'Children’s dinnerware'],
 'heating_and_cooling_amenities': ['Central heating', 'Radiant heating'],
 'home_safety_amenities': ['Smoke alarm',
  'Carbon monoxide alarm',
  'Fire extinguisher'],
 'internet_and_office_amenities': ['Wifi', 'Dedicated workspace'],
 'kitchen_and_dining_amenities': ['Kitchen',
  'Refrigerator',
  'Microwave',
  'Cooking basics',
  'Dishes and silverware',
  'Freezer',
  'Dishwasher',
  'Stainless steel electric stove',
  

In [15]:
expected_description = dict((key, parsed_result[key]) for key in parsed_result.keys() if key.endswith('_description'))
expected_description

{'place_description': 'Use our newly renovated place as your basecamp for your North Vancouver adventures. Located in Blueridge at the foot of Mt Seymour, close to world class mtb/hiking trails,  Deep Cove, and 25min to Vancouver, our 1BR garden level suite is ideal for nature lovers, professionals, and young families.  Fully equipped kitchen, office nook, queen bed and queen size pull-out couch, washer & dryer, outside patio overlooking the "evergreen" backyard. Bolt-in bike rack on covered patio to store bikes.',
 'the_space_description': "You'll find your own private entrance at the back of the house, with keyless entry and a patio for you to use.<br /><br />The suite includes a full-size kitchen with new stainless steel fridge, oven, dishwasher, microwave, and Nespresso machine (a French press is also available).  You'll find a toaster, water boiler, and all the cooking and dining utensils available to prepare and enjoy your meals. <br /><br />The cozy main living space comes with 

For more data overview see here:

https://github.com/CodeForBc/airbnb-ai/blob/main/notebooks/experiments/1.0_ab_read_all_data_and_some_eda.ipynb