# Content with notebooks

You can also create content with Jupyter Notebooks. This means that you can include
code blocks and their outputs in your book.

## Markdown + notebooks

As it is markdown, you can embed images, HTML, etc into your posts!

![](https://myst-parser.readthedocs.io/en/latest/_static/logo-wide.svg)

You can also $add_{math}$ and

$$
math^{blocks}
$$

or

$$
\begin{aligned}
\mbox{mean} la_{tex} \\ \\
math blocks
\end{aligned}
$$

But make sure you \$Escape \$your \$dollar signs \$you want to keep!

## MyST markdown

MyST markdown works in Jupyter Notebooks as well. For more information about MyST markdown, check
out [the MyST guide in Jupyter Book](https://jupyterbook.org/content/myst.html),
or see [the MyST markdown documentation](https://myst-parser.readthedocs.io/en/latest/).

## Code blocks and outputs

Jupyter Book will also embed your code blocks and output in your book.
For example, here's some sample Matplotlib code:

In [1]:
%pip install requests bs4 pandas geopy

from bs4 import BeautifulSoup
import requests
import pandas as pd
from geopy.geocoders import Nominatim

Note: you may need to restart the kernel to use updated packages.


In [20]:
import json
import re


def scrape_airbnb(check_in, check_out):
    url = f"https://www.airbnb.com/s/Madrid/homes?place_id=ChIJgTwKgJcpQg0RaSKMYcHeNsQ&refinement_paths%5B%5D=%2Fhomes&flexible_trip_dates%5B%5D=april&flexible_trip_dates%5B%5D=august&flexible_trip_dates%5B%5D=february&flexible_trip_dates%5B%5D=july&flexible_trip_dates%5B%5D=june&flexible_trip_dates%5B%5D=march&flexible_trip_dates%5B%5D=may&flexible_trip_dates%5B%5D=october&flexible_trip_dates%5B%5D=september&date_picker_type=calendar&search_type=filter_change&tab_id=home_tab&query=Madrid&monthly_start_date=2023-12-01&monthly_length=3&price_filter_input_type=0&price_filter_num_nights=1&channel=EXPLORE&flexible_trip_lengths%5B%5D=weekend_trip&source=structured_search_input_header&checkin={check_in}&checkout={check_out}&adults=1"
    response = requests.get(url).text

    soup = BeautifulSoup(response, "html.parser")

    print(soup.prettify())
    script_tag = soup.find('script', text=re.compile('niobeMinimalClientData'))
    if script_tag:
        json_data = json.loads(script_tag.string)
        return json_data
    else:
        return "Data not found"


def extract_data(json_data, check_in, check_out):
    result = []
    length = len(
        json_data["niobeMinimalClientData"][1][1]["data"]["presentation"][
            "staysSearch"
        ]["results"]["searchResults"]
    )

    for i in range(length - 1):
        # LOCATION
        location = json_data["niobeMinimalClientData"][1][1]["data"]["presentation"][
            "staysSearch"
        ]["results"]["searchResults"][i]["listing"]["coordinate"]
        latitude = location["latitude"]
        longitude = location["longitude"]

        # PRICE
        price = json_data["niobeMinimalClientData"][1][1]["data"]["presentation"][
            "staysSearch"
        ]["results"]["searchResults"][i]["pricingQuote"]["rate"]["amount"]

        room_type = json_data["niobeMinimalClientData"][1][1]["data"]["presentation"][
            "staysSearch"
        ]["results"]["searchResults"][i]["listing"]["roomTypeCategory"]

        result.append(
            {
                "check_in": check_in,
                "check_out": check_out,
                "latitude": latitude,
                "longitude": longitude,
                "price": price,
                "room_type": room_type,
            }
        )

    return result


# Load existing data from CSV, if it exists
try:
    existing_data = pd.read_csv("airbnb_data.csv")
except FileNotFoundError:
    existing_data = pd.DataFrame(
        columns=["check_in", "check_out", "latitude", "longitude", "price", "room_type"]
    )

# Scrape new data
airbnb_raw_data = []
year = 2024
for month in range(1, 13):
    for day in range(1, 28):  # Adjusted for February
        check_in = f"{year}-{month}-{day}"
        check_out = f"{year}-{month}-{day + 1}"
        json_data = scrape_airbnb(check_in, check_out)
        airbnb_raw_data.extend(extract_data(json_data, check_in, check_out))

# Convert new data into a DataFrame
new_data = pd.DataFrame(
    airbnb_raw_data,
    columns=["check_in", "check_out", "latitude", "longitude", "price", "room_type"],
)
new_data["check_in"] = pd.to_datetime(new_data["check_in"])
new_data["check_out"] = pd.to_datetime(new_data["check_out"])

# Append new data to existing data
airbnb_data = pd.concat([existing_data, new_data], ignore_index=True)

# Remove potential duplicates
airbnb_data.drop_duplicates(inplace=True)

# Save combined data back to CSV
airbnb_data.to_csv("airbnb_data.csv", index=False)

<!DOCTYPE html>
<html class="__TODO_ENABLE_REM_RESIZE__" data-hyperloop-version="2" data-is-hyperloop="true" dir="ltr" lang="en">
 <meta charset="utf-8"/>
 <meta content="en" name="locale"/>
 <meta content="notranslate" name="google"/>
 <meta content="authenticity_token" id="csrf-param-meta-tag" name="csrf-param"/>
 <meta content="" id="csrf-token-meta-tag" name="csrf-token"/>
 <meta content="" id="english-canonical-url"/>
 <meta content="on" name="twitter:widgets:csp"/>
 <meta content="yes" name="mobile-web-app-capable"/>
 <meta content="yes" name="apple-mobile-web-app-capable"/>
 <meta content="Airbnb" name="application-name"/>
 <meta content="Airbnb" name="apple-mobile-web-app-title"/>
 <meta content="#ffffff" name="theme-color"/>
 <meta content="#ffffff" name="msapplication-navbutton-color"/>
 <meta content="black-translucent" name="apple-mobile-web-app-status-bar-style"/>
 <meta content="/?utm_source=homescreen" name="msapplication-starturl"/>
 <link crossorigin="anonymous" href="

  script_tag = soup.find('script', text=re.compile('niobeMinimalClientData'))


KeyError: 'niobeMinimalClientData'

In [None]:
print(json_data["root > core-guest-spa"])

[['ExperimentsDataToken', {'china_web_revamp': {'subject': 'visitor', 'buckets': 100, 'percent_exposed': 100, 'treatments': [{'name': 'control', 'buckets': 50}, {'name': 'treatment', 'buckets': 50}], 'hashing_key': 'china_web_revamp', 'sitar_overrides': {}, 'trebuchets': []}, 'installed_pwa': {'subject': 'visitor', 'buckets': 2, 'percent_exposed': 10, 'treatments': [{'name': 'control', 'buckets': 1}, {'name': 'treatment', 'buckets': 1}], 'hashing_key': 'installed_pwa', 'sitar_overrides': {}, 'trebuchets': []}, 'installed_pwa_parallel': {'subject': 'visitor', 'buckets': 2, 'percent_exposed': 10, 'treatments': [{'name': 'control', 'buckets': 1}, {'name': 'treatment', 'buckets': 1}], 'hashing_key': 'installed_pwa_parallel', 'sitar_overrides': {}, 'trebuchets': []}, 'contact_host_sections_preload_query_v4': {'subject': 'user', 'buckets': 100, 'percent_exposed': 100, 'treatments': [{'name': 'control', 'buckets': 50}, {'name': 'treatment', 'buckets': 50}], 'hashing_key': 'contact_host_sectio

In [None]:
airbnb_data['month'] = airbnb_data['check_in'].dt.month

# Prepare data for graph
# Extract month from check_in date


# Group by month and calculate average price
airbnb_graph_data = airbnb_data.groupby('month')['price'].mean().reset_index()

airbnb_graph_data.to_csv("airbnb_graph_data.csv", index=False)
airbnb_graph_data.head()

In [2]:
airbnb_data = pd.read_csv("airbnb_data.csv")
airbnb_data["check_in"] = pd.to_datetime(airbnb_data["check_in"])
airbnb_data["check_out"] = pd.to_datetime(airbnb_data["check_out"])

airbnb_data['month'] = airbnb_data['check_in'].dt.month

# Prepare data for map

airbnb_map_data = airbnb_data.groupby(['latitude', 'longitude', 'month'])['price'].mean().reset_index()
airbnb_map_data.to_csv("airbnb_map_data.csv", index=False)

airbnb_map_data.head()

Unnamed: 0,latitude,longitude,month,price
0,40.3441,-3.691984,1,20.0
1,40.3441,-3.691984,2,21.0
2,40.3441,-3.691984,3,21.0
3,40.372076,-3.693983,7,67.0
4,40.378957,-3.670362,2,98.0


In [3]:
from geopy.geocoders import Nominatim

def get_district(latitude, longitude):
    # Initialize Nominatim API
    geolocator = Nominatim(user_agent="map_app_airbnb")

    # Get location with reverse geocode
    location = geolocator.reverse((latitude, longitude), exactly_one=True)

    if location:
        address = location.raw['address']
        district = address.get('suburb')
        return district
    else:
        return "District not found"

# Example usage
latitude = 40.748817
longitude = -73.985428
print(get_district(40.3789571457818,-3.6703623401582193))


airbnb_map_data['region'] = airbnb_map_data.apply(lambda x: get_district(x['latitude'], x['longitude']), axis=1)
print(airbnb_map_data.head())
airbnb_map_data.to_csv("airbnb_map_data.csv", index=False)

Puente de Vallecas


GeocoderUnavailable: HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Max retries exceeded with url: /reverse?lat=40.41876446260283&lon=-3.7045130377501576&format=json&addressdetails=1 (Caused by ReadTimeoutError("HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Read timed out. (read timeout=1)"))