# Libraries

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import json
import os
from pymongo import MongoClient

# San Francisco rental estate for offices and coworking places

## Offices

In [2]:
# 100 places for 87 employees or more, and not more than 120:
urls = [f"https://www.squarefoot.com/office-space/m/ca/san_francisco/1aee3919-edd5-4fad-b301-76f335aae568?minOccupancy=87&maxOccupancy=120&page={i}&activeSizeFilter=SEATS&groupByBuilding=false" for i in range(1,5)]

In [3]:
# url prueba
url = "https://www.squarefoot.com/office-space/m/ca/san_francisco/1aee3919-edd5-4fad-b301-76f335aae568?minOccupancy=87&maxOccupancy=120&&activeSizeFilter=SEATS&groupByBuilding=false"

In [4]:
res = requests.get(url)
html = res.content
soup = BeautifulSoup(html, 'html.parser')

In [5]:
body = soup.find_all("script", {"id": "__NEXT_DATA__"})
info = body[0].getText("application/json")
json = json.loads(info)

In [6]:
# No place has publicly the prices for offices for 87 employees or more
prices = [i["accurateMonthlyPrice"] for i in json["props"]["pageProps"]["queryManager"]["values"]["listings"]]
print(prices)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [7]:
# number_employees
number_employees = [i["estimatedMaxOccupancy"] for i in json["props"]["pageProps"]["queryManager"]["values"]["listings"]]
print(number_employees)

[93, 87, 103, 114, 92, 99, 119, 93, 119, 119, 115, 119, 104, 100, 107, 105, 107, 91, 99, 117, 108, 104, 94, 117]


In [8]:
lat = [i["building"]["lat"] for i in json["props"]["pageProps"]["queryManager"]["values"]["listings"]]
lon = [i["building"]["lng"] for i in json["props"]["pageProps"]["queryManager"]["values"]["listings"]]

In [3]:
#Functions
def getJson(url):
    res = requests.get(url)
    html = res.content
    soup = BeautifulSoup(html, 'html.parser')
    body = soup.find_all("script", {"id": "__NEXT_DATA__"})
    info = body[0].getText("application/json")
    js = json.loads(info)
    return js

def getEmployees(js):
    return [i["estimatedMaxOccupancy"] for i in js["props"]["pageProps"]["queryManager"]["values"]["listings"]]
def getLat(js):
    return [i["building"]["lat"] for i in js["props"]["pageProps"]["queryManager"]["values"]["listings"]]
def getLon(js):
    return [i["building"]["lng"] for i in js["props"]["pageProps"]["queryManager"]["values"]["listings"]]

In [4]:
def getDict(urls):
    employees = []
    lat = []
    lon = []
    for i in urls:
        jsons = getJson(i)
        employees.extend(getEmployees(jsons))
        lat.extend(getLat(jsons))
        lon.extend(getLon(jsons))
    dict_ = {
        "employees": employees,
        "lat": lat,
        "lon": lon
    }
        
    return dict_

In [5]:
dict_rents = getDict(urls)

In [6]:
rents = pd.DataFrame(dict_rents)
rents.sort_values(by = "employees", ascending = True, inplace=True)
#Here are the coordinates of every office of interest from smaller to bigger

In [7]:
rents

Unnamed: 0,employees,lat,lon
1,87,37.784463,-122.242628
85,88,37.766039,-122.393089
60,90,37.723888,-122.236083
34,91,37.793076,-122.399608
36,91,37.793076,-122.399608
...,...,...,...
84,119,37.790913,-122.396728
83,119,37.790913,-122.396728
9,119,37.795014,-122.399435
94,119,37.790913,-122.396728


## Neighborhoods

In [None]:
url = "https://www.squarefoot.com/office-space/m/ca/san_francisco/1aee3919-edd5-4fad-b301-76f335aae568?minOccupancy=87&maxOccupancy=120&&activeSizeFilter=SEATS&groupByBuilding=false"
res = requests.get(url)
html = res.content
soup = BeautifulSoup(html, 'html.parser')
body = soup.find_all("script", {"id": "__NEXT_DATA__"})
info = body[0].getText("application/json")
json = json.loads(info)

In [9]:
neighborhoods = json["props"]["pageProps"]["queryManager"]["values"]["submarkets"][0]["neighborhoods"]
#Some of these neighbourhoods aren't in San Francisco, but in the Bay Area

In [10]:
name = [i["name"] for i in neighborhoods]
geojson = [i["geojson"] for i in neighborhoods]
dict_neigh = {
    "name": name,
    "geojson": geojson
}
pd.DataFrame(dict_neigh)

Unnamed: 0,name,geojson
0,28 Palms,"{'type': 'Polygon', 'coordinates': [[[-121.987..."
1,Acorn-Acorn Industrial,"{'type': 'Polygon', 'coordinates': [[[-122.342..."
2,Adams Point,"{'type': 'Polygon', 'coordinates': [[[-122.262..."
3,Alameda,"{'type': 'Polygon', 'coordinates': [[[-122.237..."
4,Allendale,"{'type': 'Polygon', 'coordinates': [[[-122.210..."
...,...,...
272,West San Jose,"{'type': 'Polygon', 'coordinates': [[[-122.003..."
273,Whitman-Mocine,"{'type': 'Polygon', 'coordinates': [[[-122.066..."
274,Willow Glen,"{'type': 'Polygon', 'coordinates': [[[-121.887..."
275,Woodland,"{'type': 'Polygon', 'coordinates': [[[-122.194..."


In [11]:
# Save polygones in mongoDB -> create a list of dictionaries
dict_neigh_rows = []
for index, row in pd.DataFrame(dict_neigh).iterrows():
    dict_neigh_rows.append({"name":row["name"], "geojson":row["geojson"]})

## Where are most of the offices located

In [12]:
# coordinates of neighborhoods in dict_neigh["geojson"]
client = MongoClient("localhost:27017")
db = client.get_database("Ironhack")

In [13]:
neighborhoodsSF = db.get_collection("neighborhoodsSF")

In [14]:
# neighborhoodsSF.insert_many(dict_neigh_rows)

<pymongo.results.InsertManyResult at 0x1b39a536110>

In [15]:
# coordinates of offices in rents["lon"] y rents["lat"]
rents["positions"] = rents.apply(lambda row: [row["lon"], row["lat"]], axis=1)
rents

Unnamed: 0,employees,lat,lon,positions
1,87,37.784463,-122.242628,"[-122.242628, 37.784463]"
85,88,37.766039,-122.393089,"[-122.3930894, 37.7660393]"
60,90,37.723888,-122.236083,"[-122.236083, 37.723888]"
34,91,37.793076,-122.399608,"[-122.399608, 37.793076]"
36,91,37.793076,-122.399608,"[-122.399608, 37.793076]"
...,...,...,...,...
84,119,37.790913,-122.396728,"[-122.3967284, 37.7909128]"
83,119,37.790913,-122.396728,"[-122.3967284, 37.7909128]"
9,119,37.795014,-122.399435,"[-122.399435, 37.795014]"
94,119,37.790913,-122.396728,"[-122.3967284, 37.7909128]"


In [16]:
# Create a list with all the point coordinates to loop through them
locations = [row["positions"] for index, row in rents.iterrows()]
print(locations)

[[-122.242628, 37.784463], [-122.3930894, 37.7660393], [-122.236083, 37.723888], [-122.399608, 37.793076], [-122.399608, 37.793076], [-122.399608, 37.793076], [-121.921571, 37.368335], [-122.396108, 37.795251], [-122.28784, 37.83779], [-122.124104, 38.070633], [-122.3979675, 37.7927653], [-122.293231, 37.853225], [-122.401807, 37.793174], [-122.3946185, 37.7926608], [-122.391276, 37.7815], [-122.392064, 37.776501], [-122.401807, 37.793174], [-122.401807, 37.793174], [-122.3946185, 37.7926608], [-122.403342, 37.794353], [-122.399808, 37.78945], [-122.403342, 37.794353], [-122.403643, 37.795292], [-122.401807, 37.793174], [-122.403342, 37.794353], [-122.398641, 37.788446], [-122.401807, 37.793174], [-122.403342, 37.794353], [-122.405783, 37.785435], [-122.405349, 37.785765], [-122.4004474, 37.7870868], [-122.403342, 37.794353], [-122.398641, 37.788446], [-122.403517, 37.793857], [-122.403517, 37.793857], [-122.287257, 37.847661], [-122.396108, 37.795251], [-122.417193, 37.77737], [-122.4

In [17]:
positions = [{"type": "Point", "coordinates": i} for i in locations]

In [18]:
neighborhoodsSF.find_one(
        {"geojson":
         {"$geoIntersects":
          {"$geometry": {'type': 'Point', 'coordinates': [-122.242628, 37.784463]}}}
        })["name"]

'East Peralta'

In [19]:
list_neigh = []
for i in positions:
    try:
        list_neigh.append(
            {"neighborhoods": neighborhoodsSF.find_one(
                {"geojson":
                 {"$geoIntersects":
                  {"$geometry": i}
                 }
                })["name"]
            })
    except: # I don't understand why some neightborhoods don't accept the location of the offices, when these locations are right
        list_neigh.append({"neighborhoods":None})

In [42]:
pd.DataFrame(list_neigh).value_counts().head()

neighborhoods     
Financial District    67
South Of Market        4
Mission Bay            3
North San Jose         3
Alameda                1
Name: count, dtype: int64

# 4 final venues

In [41]:
# https://www.viajarsanfrancisco.com/salir-de-fiesta-ocio-nocturno.php:
# Best neighborhood for partying is South of Market
list_offices = []
for i in positions:
    try:
        if neighborhoodsSF.find_one(
            {"geojson":
             {"$geoIntersects":
              {"$geometry": i}
             }
            })["name"] == "South Of Market":
            list_offices.append(i)
    except:
        pass
list_offices

[{'type': 'Point', 'coordinates': [-122.391276, 37.7815]},
 {'type': 'Point', 'coordinates': [-122.405783, 37.785435]},
 {'type': 'Point', 'coordinates': [-122.408109, 37.783521]},
 {'type': 'Point', 'coordinates': [-122.39586, 37.785766]}]