# Libraries

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import json
import os
from pymongo import MongoClient

# San Francisco rental estate for offices and coworking places

## Offices

In [2]:
# 100 places for 87 employees or more, and not more than 120:
urls = [f"https://www.squarefoot.com/office-space/m/ca/san_francisco/1aee3919-edd5-4fad-b301-76f335aae568?minOccupancy=87&maxOccupancy=120&page={i}&activeSizeFilter=SEATS&groupByBuilding=false" for i in range(1,9)]

In [29]:
# url prueba
url = "https://www.squarefoot.com/office-space/m/ca/san_francisco/1aee3919-edd5-4fad-b301-76f335aae568?minOccupancy=87&maxOccupancy=120&&activeSizeFilter=SEATS&groupByBuilding=false"

In [30]:
res = requests.get(url)
html = res.content
soup = BeautifulSoup(html, 'html.parser')

In [31]:
body = soup.find_all("script", {"id": "__NEXT_DATA__"})
info = body[0].getText("application/json")
json = json.loads(info)

In [40]:
json["props"]["pageProps"]["queryManager"]["values"]["listings"][0]["building"]["address"]

'3985 Teal Court'

In [32]:
# No place has publicly the prices for offices for 87 employees or more
prices = [i["accurateMonthlyPrice"] for i in json["props"]["pageProps"]["queryManager"]["values"]["listings"]]
print(prices)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [None]:
# number_employees
number_employees = [i["estimatedMaxOccupancy"] for i in json["props"]["pageProps"]["queryManager"]["values"]["listings"]]
print(number_employees)

In [8]:
lat = [i["building"]["lat"] for i in json["props"]["pageProps"]["queryManager"]["values"]["listings"]]
lon = [i["building"]["lng"] for i in json["props"]["pageProps"]["queryManager"]["values"]["listings"]]

In [3]:
#Functions
def getJson(url):
    res = requests.get(url)
    html = res.content
    soup = BeautifulSoup(html, 'html.parser')
    body = soup.find_all("script", {"id": "__NEXT_DATA__"})
    info = body[0].getText("application/json")
    js = json.loads(info)
    return js

def getEmployees(js):
    return [i["estimatedMaxOccupancy"] for i in js["props"]["pageProps"]["queryManager"]["values"]["listings"]]
def getLat(js):
    return [i["building"]["lat"] for i in js["props"]["pageProps"]["queryManager"]["values"]["listings"]]
def getLon(js):
    return [i["building"]["lng"] for i in js["props"]["pageProps"]["queryManager"]["values"]["listings"]]
def getAddress(js):
    return [i["building"]["address"] for i in js["props"]["pageProps"]["queryManager"]["values"]["listings"]]

In [6]:
def getDict(urls):
    employees = []
    lat = []
    lon = []
    address = []
    for i in urls:
        jsons = getJson(i)
        employees.extend(getEmployees(jsons))
        lat.extend(getLat(jsons))
        lon.extend(getLon(jsons))
        address.extend(getAddress(jsons))
    dict_ = {
        "employees": employees,
        "lat": lat,
        "lon": lon,
        "address": address
    }
        
    return dict_

In [7]:
dict_rents = getDict(urls)

In [8]:
rents = pd.DataFrame(dict_rents)
rents.sort_values(by = "employees", ascending = True, inplace=True)
#Here are the coordinates of every office of interest from smaller to bigger

In [9]:
rents

Unnamed: 0,employees,lat,lon,address
114,87,37.791582,-122.394000,135 Main Street
163,87,37.788891,-122.398100,535 Mission Street
147,87,37.794792,-122.403684,601 Montgomery Street
151,87,37.791582,-122.394000,135 Main Street
1,87,37.784463,-122.242628,1830 Embarcadero
...,...,...,...,...
121,119,37.790913,-122.396728,350 Mission Street
120,119,37.790913,-122.396728,350 Mission Street
9,119,37.795014,-122.399435,1 Embarcadero Center
181,119,37.790913,-122.396728,350 Mission Street


## Neighborhoods

In [10]:
url = "https://www.squarefoot.com/office-space/m/ca/san_francisco/1aee3919-edd5-4fad-b301-76f335aae568?minOccupancy=87&maxOccupancy=120&&activeSizeFilter=SEATS&groupByBuilding=false"
res = requests.get(url)
html = res.content
soup = BeautifulSoup(html, 'html.parser')
body = soup.find_all("script", {"id": "__NEXT_DATA__"})
info = body[0].getText("application/json")
json = json.loads(info)

In [11]:
neighborhoods = json["props"]["pageProps"]["queryManager"]["values"]["submarkets"][0]["neighborhoods"]
#Some of these neighbourhoods aren't in San Francisco, but in the Bay Area

In [12]:
name = [i["name"] for i in neighborhoods]
geojson = [i["geojson"] for i in neighborhoods]
dict_neigh = {
    "name": name,
    "geojson": geojson
}
pd.DataFrame(dict_neigh)

Unnamed: 0,name,geojson
0,28 Palms,"{'type': 'Polygon', 'coordinates': [[[-121.987..."
1,Acorn-Acorn Industrial,"{'type': 'Polygon', 'coordinates': [[[-122.342..."
2,Adams Point,"{'type': 'Polygon', 'coordinates': [[[-122.262..."
3,Alameda,"{'type': 'Polygon', 'coordinates': [[[-122.237..."
4,Allendale,"{'type': 'Polygon', 'coordinates': [[[-122.210..."
...,...,...
272,West San Jose,"{'type': 'Polygon', 'coordinates': [[[-122.003..."
273,Whitman-Mocine,"{'type': 'Polygon', 'coordinates': [[[-122.066..."
274,Willow Glen,"{'type': 'Polygon', 'coordinates': [[[-121.887..."
275,Woodland,"{'type': 'Polygon', 'coordinates': [[[-122.194..."


In [13]:
# Save polygones in mongoDB -> create a list of dictionaries
dict_neigh_rows = []
for index, row in pd.DataFrame(dict_neigh).iterrows():
    dict_neigh_rows.append({"name":row["name"], "geojson":row["geojson"]})

## Where are most of the offices located

In [14]:
# coordinates of neighborhoods in dict_neigh["geojson"]
client = MongoClient("localhost:27017")
db = client.get_database("Ironhack")

In [15]:
neighborhoodsSF = db.get_collection("neighborhoodsSF")

In [16]:
# neighborhoodsSF.insert_many(dict_neigh_rows)

In [17]:
# coordinates of offices in rents["lon"] y rents["lat"]
rents["positions"] = rents.apply(lambda row: [row["lon"], row["lat"]], axis=1)
rents

Unnamed: 0,employees,lat,lon,address,positions
114,87,37.791582,-122.394000,135 Main Street,"[-122.394, 37.791582]"
163,87,37.788891,-122.398100,535 Mission Street,"[-122.3981003, 37.7888907]"
147,87,37.794792,-122.403684,601 Montgomery Street,"[-122.403684, 37.794792]"
151,87,37.791582,-122.394000,135 Main Street,"[-122.394, 37.791582]"
1,87,37.784463,-122.242628,1830 Embarcadero,"[-122.242628, 37.784463]"
...,...,...,...,...,...
121,119,37.790913,-122.396728,350 Mission Street,"[-122.3967284, 37.7909128]"
120,119,37.790913,-122.396728,350 Mission Street,"[-122.3967284, 37.7909128]"
9,119,37.795014,-122.399435,1 Embarcadero Center,"[-122.399435, 37.795014]"
181,119,37.790913,-122.396728,350 Mission Street,"[-122.3967284, 37.7909128]"


In [27]:
rents[rents["positions"]== str([-122.406468, 37.776344])]

Unnamed: 0,employees,lat,lon,address,positions


In [26]:
rents.to_csv("../data/officesSF.csv", index=False)

In [18]:
# Create a list with all the point coordinates to loop through them
locations = [row["positions"] for index, row in rents.iterrows()]
print(locations)

[[-122.394, 37.791582], [-122.3981003, 37.7888907], [-122.403684, 37.794792], [-122.394, 37.791582], [-122.242628, 37.784463], [-122.402553, 37.789813], [-122.3930894, 37.7660393], [-122.066477, 37.909636], [-122.3981003, 37.7888907], [-122.398992, 37.792624], [-122.3981003, 37.7888907], [-122.3946185, 37.7926608], [-121.9160655, 37.3653324], [-122.236083, 37.723888], [-121.921571, 37.368335], [-122.404104, 37.786795], [-122.399608, 37.793076], [-122.404104, 37.786795], [-122.399608, 37.793076], [-122.399608, 37.793076], [-122.4008102, 37.7942355], [-122.28784, 37.83779], [-122.396108, 37.795251], [-122.398992, 37.792624], [-122.4001359, 37.7890841], [-122.4001359, 37.7890841], [-122.3979675, 37.7927653], [-122.401807, 37.793174], [-122.399808, 37.78945], [-122.124104, 38.070633], [-122.293231, 37.853225], [-122.401807, 37.793174], [-122.4033442, 37.7886378], [-122.401807, 37.793174], [-122.401807, 37.793174], [-122.3946185, 37.7926608], [-122.391276, 37.7815], [-122.401807, 37.793174]

In [19]:
positions = [{"type": "Point", "coordinates": i} for i in locations]

In [20]:
neighborhoodsSF.find_one(
        {"geojson":
         {"$geoIntersects":
          {"$geometry": {'type': 'Point', 'coordinates': [-122.242628, 37.784463]}}}
        })["name"]

'East Peralta'

In [21]:
list_neigh = []
for i in positions:
    try:
        list_neigh.append(
            {"neighborhoods": neighborhoodsSF.find_one(
                {"geojson":
                 {"$geoIntersects":
                  {"$geometry": i}
                 }
                })["name"]
            })
    except: # Some neightborhoods of San Francisco might not be included in the selection of renting venues
        list_neigh.append({"neighborhoods":None})

In [22]:
pd.DataFrame(list_neigh).value_counts().head()

neighborhoods     
Financial District    137
South Of Market        13
Mission Bay             7
Downtown                4
North San Jose          4
Name: count, dtype: int64

## Where is the only design startup in San Francisco?

In [23]:
c = db.get_collection("companies")
condition_1 = {"number_of_employees":{"$lt": 100}}
condition_2 = {"tag_list": {"$regex": "[Dd]esign"}}
condition_3 = {"offices.city": "San Francisco"}
condition_4 = {"total_money_raised": {"$regex":".+M$"}}
condition_5 = {"total_money_raised": {"$regex":".+B$"}}
projection = {"offices.latitude": 1, "offices.longitude": 1, "_id": 0}
position = list(c.find({"$and": [condition_1, condition_2, condition_3, {"$or": [condition_4, condition_5]}]}, projection))
# There was only one company
position = [position[0]["offices"][0]["longitude"], position[0]["offices"][0]["latitude"]]
position

[-122.394523, 37.764726]

In [24]:
try:
    neighDesign = {
        "neighborhoods": neighborhoodsSF.find_one(
            {"geojson":
             {"$geoIntersects":
              {"$geometry": [-122.394523, 37.764726]}
             }
            })["name"]
        }
except: # I don't understand why some neightborhoods don't accept the location of the offices, when these locations are right
    neighDesign = None
    
print(neighDesign) # Not in the other list

{'neighborhoods': 'Potrero Hill'}


# 13 final venues

In [25]:
# https://www.viajarsanfrancisco.com/salir-de-fiesta-ocio-nocturno.php:
# Best neighborhood for partying is South of Market
list_offices = []
for i in positions:
    try:
        if neighborhoodsSF.find_one(
            {"geojson":
             {"$geoIntersects":
              {"$geometry": i}
             }
            })["name"] == "South Of Market":
            list_offices.append(i)
    except:
        pass
list_offices

[{'type': 'Point', 'coordinates': [-122.391276, 37.7815]},
 {'type': 'Point', 'coordinates': [-122.3921923, 37.781381]},
 {'type': 'Point', 'coordinates': [-122.4097127, 37.78205]},
 {'type': 'Point', 'coordinates': [-122.405783, 37.785435]},
 {'type': 'Point', 'coordinates': [-122.405783, 37.785435]},
 {'type': 'Point', 'coordinates': [-122.405783, 37.785435]},
 {'type': 'Point', 'coordinates': [-122.405783, 37.785435]},
 {'type': 'Point', 'coordinates': [-122.405783, 37.785435]},
 {'type': 'Point', 'coordinates': [-122.405783, 37.785435]},
 {'type': 'Point', 'coordinates': [-122.408109, 37.783521]},
 {'type': 'Point', 'coordinates': [-122.3920816, 37.7828482]},
 {'type': 'Point', 'coordinates': [-122.3920816, 37.7828482]},
 {'type': 'Point', 'coordinates': [-122.39586, 37.785766]}]