# Choose city

We have chosen four criteria for our company

- Designers like to go to design talks and share knowledge. There must be some nearby companies that also do design.

- 30% of the company staff have at least 1 child.

- Developers like to be near successful tech startups that have raised at least 1 Million dollars.

- Executives like Starbucks A LOT. Ensure there's a starbucks not too far.

In [1]:
from IPython.display import display, HTML
from pymongo import MongoClient
import json
import pandas as pd
import geopandas as gpd
import requests
from dotenv import load_dotenv
import os
from functools import reduce
import operator
import folium
from folium import Choropleth, Circle, Marker, Icon, Map
from folium.plugins import HeatMap, MarkerCluster
from src.utils import *
#import libraries

In [2]:
client = MongoClient()#conect to Mongo

In [3]:
client

MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True)

In [4]:
companies = client.Dataironhack.LabMongo
offices = client.Dataironhack.offices
#create collections

In [5]:
comp_info = list(companies.find(
    {"$or":
     [{"$and":
       [{"tag_list":{"$regex":"design"}},
        {"deadpooled_year":{"$eq":None}}]},
      {"$and":
       [{"category_code":"software"},
        {"total_money_raised":{"$regex":"[\$]([1-9])[\.\d]?[M]"}},
        {"deadpooled_year":{"$eq":None}}]}]},
    {"_id":0, "name":1, "offices":1, "category_code":1, "total_money_raised":1}
    
))
#look for companies, on the one hand, that do design tasks, 
#and on the other hand, that are technology companies that have earned more than 1M dollars.
#Also, consider that the companies are still active.

Select from the database the companies that perform design tasks and are still active. In addition to tech companies that have earned more than $1M since their inception, and are still active.
For this, we selected software companies.

In [6]:
offices.insert_many(comp_info)#insert in the collection

<pymongo.results.InsertManyResult at 0x19166d93400>

In [7]:
office = offices.aggregate([
    {"$unwind":"$offices"}, 
    {"$match": {"offices.city":{"$ne":None}, "offices.latitude":{"$ne":None}, "offices.longitude":{"$ne":None}}},
    {"$set":
     {"geojson":
      {"type":"Point",
       "coordinates":["$offices.longitude","$offices.latitude"]}}},
    {"$project":{"_id":0}}])
#separate offices, eliminate nulls, and create coordinates

In [8]:
offices.drop()#drop the last collection
offices.insert_many(office)#and recreate with new info

<pymongo.results.InsertManyResult at 0x19166dbb640>

In [9]:
offices_info = list(offices.find({},
                            {"_id":0, "name": 1, 
                             "category_code":1, 
                             "offices.city":1, 
                             "offices.latitude":1, 
                             "offices.longitude":1}))


In [10]:
df = pd.DataFrame(offices_info)#create dataframe to check the city that has more companies

In [11]:
df.head()#check

Unnamed: 0,name,category_code,offices
0,Digg,news,"{'city': 'San Francisco', 'latitude': 37.76472..."
1,Splunk,software,"{'city': 'San Francisco', 'latitude': 37.78274..."
2,99designs,design,"{'city': 'San Francisco', 'latitude': 37.79553..."
3,Cutcaster,web,"{'city': 'San Francisco', 'latitude': 37.79851..."
4,Globant,software,"{'city': 'San Francisco', 'latitude': 37.78192..."


In [12]:
df["city"] = df["offices"].apply(extract_city)

In [13]:
df["latitude"] = df["offices"].apply(extract_latitude)

In [14]:
df["longitude"] = df["offices"].apply(extract_longitude)
#separate the dictionary into three columns

In [15]:
df.head()

Unnamed: 0,name,category_code,offices,city,latitude,longitude
0,Digg,news,"{'city': 'San Francisco', 'latitude': 37.76472...",San Francisco,37.764726,-122.394523
1,Splunk,software,"{'city': 'San Francisco', 'latitude': 37.78274...",San Francisco,37.78274,-122.390945
2,99designs,design,"{'city': 'San Francisco', 'latitude': 37.79553...",San Francisco,37.795531,-122.400598
3,Cutcaster,web,"{'city': 'San Francisco', 'latitude': 37.79851...",San Francisco,37.79851,-122.457454
4,Globant,software,"{'city': 'San Francisco', 'latitude': 37.78192...",San Francisco,37.781929,-122.404176


In [16]:
df = df.drop(columns = ["offices"])#drop the column

In [17]:
df["city"].value_counts()#check the cities

San Francisco         78
New York              24
London                20
San Diego             11
Los Angeles           11
                      ..
Washington             1
Hastings-on-Hudson     1
everett                1
Resistencia            1
Phoenix                1
Name: city, Length: 352, dtype: int64

Create a dataframe of the companies to get their values, and compare which of the cities present has the highest number of selected companies.
As you can see, San Francisco is the city with the most companies, so we selected it for our company.

In [18]:
city_sf = df[df["city"] == "San Francisco"]

In [19]:
office_sanf = offices.aggregate([
    {"$match":
     {"offices.city":"San Francisco"}},
    {"$project":{"_id":0}}])
#choose San Francisco

In [20]:
offices.drop()
offices.insert_many(office_sanf)

<pymongo.results.InsertManyResult at 0x19166f61440>

In [21]:
offices.create_index([("geojson", "2dsphere")])#create index

'geojson_2dsphere'

In [22]:
map_sf = Map(location=[37.764726, -122.394523],zoom_start=13)#create a map

In [23]:
for i,row in city_sf.iterrows():
    company = {
        "location" : [row["latitude"],row["longitude"]],
        "tooltip" : row["category_code"]}
    if row["category_code"] == "software":
        icon = Icon(color = "blue",
                    prefix = "fa",
                    icon = "laptop",
                    icon_color = "black")
    else:
        icon = Icon(color = "red",
                    prefix = "fa",
                    icon = "briefcase",
                    icon_color = "black")
        
    Marker(**company,icon = icon).add_to(map_sf)

map_sf.save("map_companies.html")
map_sf
#create markers and icons

As can be seen in the map, most of the software companies are together in the same area, while the rest of the companies are more dispersed over a larger area.

# Check the Starbucks

In [30]:
load_dotenv()

True

In [31]:
coord_sf = requests.get(f"https://geocode.xyz/San%20Francisco,US?json=1").json()#take San Francisco coords

In [32]:
coord_sf

{'standard': {'addresst': {},
  'city': 'San Francisco',
  'prov': 'US',
  'countryname': 'United States of America',
  'postal': {},
  'confidence': '0.90'},
 'longt': '-122.40443',
 'alt': {},
 'elevation': {},
 'latt': '37.66881'}

In [33]:
sf_coordinates = get_coordinates(coord_sf)
sf_coordinates#take coordinates

{'type': 'Point', 'coordinates': [-122.40443, 37.66881]}

In [34]:
sanf_coordinates = f"{sf_coordinates.get('coordinates')[1]},{sf_coordinates.get('coordinates')[0]}"

In [35]:
sanf_coordinates

'37.66881,-122.40443'

Get the coordinates of San Francisco to find the Starbucks and schools in the city.

In [36]:
starbucks_list = get_location_from_foursquare_in_SanFrancisco("starbucks")

Calling the Api
Getting the coordinates
Finished


In [37]:
starbucks_list[0]

{'name': 'Starbucks', 'latitude': 37.65511627, 'longitude': -122.4109091}

In [38]:
starbucks = client.Dataironhack.starbucks #create collection

In [39]:
starbucks.insert_many(starbucks_list)

<pymongo.results.InsertManyResult at 0x191670f6d40>

In [40]:
star = starbucks.aggregate([
    {"$set": {"geojson" : {"type": "Point", "coordinates" : ["$longitude", "$latitude"]}}},
    {"$match": {"latitude":{"$ne":None}, "longitude":{"$ne":None}}},
    {"$project":{"_id":0}}])
#get geojson

In [41]:
starbucks.drop()
starbucks.insert_many(star)

<pymongo.results.InsertManyResult at 0x1916571ca00>

In [42]:
check_starbucks = list(starbucks.find().limit(1))
check_starbucks
#check

[{'_id': ObjectId('602a548c437b2f008a93b66b'),
  'name': 'Starbucks',
  'latitude': 37.65511627,
  'longitude': -122.4109091,
  'geojson': {'type': 'Point', 'coordinates': [-122.4109091, 37.65511627]}}]

In [43]:
starbucks.create_index([("geojson", "2dsphere")])# get index

'geojson_2dsphere'

In [44]:
map_st = Map(location=[37.764726, -122.394523],zoom_start=12)#create map

In [45]:
st_list = pd.DataFrame(starbucks_list)#create dataframe

In [46]:
st_group = folium.FeatureGroup(name = "Starbucks")

In [47]:
HeatMap(data=st_list[["latitude","longitude"]],radius=16).add_to(st_group)#create heatmap

<folium.plugins.heat_map.HeatMap at 0x191670f4c40>

In [48]:
st_group.add_to(map_st)
map_st.save("firstheatmapstarbucks.html")
map_st

We obtain the coordinates of all the sturbucks in the city, and through the heat map we see that they are spread throughout the city but there is a higher concentration of these establishments near where the companies we have previously selected are located.

# Check the Schools

In [49]:
schools_list = get_location_from_foursquare_in_SanFrancisco_second_mod("school")

Calling the Api
Getting the coordinates


In [50]:
schools_list[0]

{'name': 'All Souls Catholic School',
 'latitude': 37.657142639160156,
 'longitude': -122.4152603149414}

In [51]:
schools = client.Dataironhack.schools

In [52]:
schools.insert_many(schools_list)

<pymongo.results.InsertManyResult at 0x19165710ec0>

In [53]:
sch = schools.aggregate([
    {"$set": {"geojson" : {"type": "Point", "coordinates" : ["$longitude", "$latitude"]}}},
    {"$match": {"latitude":{"$ne":None}, "longitude":{"$ne":None}}},
    {"$project":{"_id":0}}])

In [54]:
schools.drop()
schools.insert_many(sch)

<pymongo.results.InsertManyResult at 0x19166e37fc0>

In [55]:
check_school = list(schools.find().limit(1))
check_school

[{'_id': ObjectId('602a5493437b2f008a93b786'),
  'name': 'All Souls Catholic School',
  'latitude': 37.657142639160156,
  'longitude': -122.4152603149414,
  'geojson': {'type': 'Point',
   'coordinates': [-122.4152603149414, 37.657142639160156]}}]

In [56]:
schools.create_index([("geojson", "2dsphere")])

'geojson_2dsphere'

In [57]:
sc_list = pd.DataFrame(schools_list)

In [58]:
sc_group = folium.FeatureGroup(name = "School")

In [59]:
HeatMap(data=sc_list[["latitude","longitude"]],radius=16).add_to(sc_group)

<folium.plugins.heat_map.HeatMap at 0x19167027df0>

In [60]:
sc_group.add_to(map_st)

<folium.map.FeatureGroup at 0x19167027eb0>

In [61]:
folium.LayerControl(collapsed=False).add_to(map_st)
map_st.save("secondheatmap_starbucks_scools.html")
map_st

See the map and verify that the concentration of schools is farther away from the business zone than the Starbucks.

# Points

In [62]:
city_sf["category_code"].value_counts()#check the category companies

software       42
advertising    12
cleantech       6
mobile          6
news            3
design          3
ecommerce       3
web             3
Name: category_code, dtype: int64

With all this, let's choose a software company site, but which one?
To facilitate the choice, rate the different proximities to selected establishments 

In [63]:
city_sf_software = city_sf[city_sf["category_code"] == "software"]

In [64]:
software_office = list(offices.find({"category_code":"software"}, {"_id":0}))
software_office.pop(4)
#find the software companies

{'name': 'Telespree',
 'category_code': 'software',
 'total_money_raised': '$1M',
 'offices': {'description': 'Headquarters',
  'address1': '185 Berry Street',
  'address2': '',
  'zip_code': '',
  'city': 'San Francisco',
  'state_code': 'CA',
  'country_code': 'USA',
  'latitude': 37.777092,
  'longitude': -122.391011},
 'geojson': {'type': 'Point', 'coordinates': [-122.391011, 37.777092]}}

In [65]:
designers = client.Dataironhack.designers

In [66]:
design_list=list(offices.find({"category_code":{"$ne":"software"}}))

In [67]:
designers.insert_many(design_list)

<pymongo.results.InsertManyResult at 0x19167109700>

In [68]:
design_geo = designers.aggregate([
    {"$set":
     {"geojson":
      {"type":"Point",
       "coordinates":["$offices.longitude","$offices.latitude"]}}},
    {"$project":{"_id":0}}])
#create design collect

In [69]:
designers.drop()
designers.insert_many(design_list)

<pymongo.results.InsertManyResult at 0x191656b68c0>

In [70]:
designers.create_index([("geojson", "2dsphere")])

'geojson_2dsphere'

In [71]:
n_final_list = []
for soft in software_office:
    n_final_dic = {}
    names = soft.get("name")
    i = soft.get("geojson")
    starb = list(starbucks.find({"geojson":{"$near":i, "$maxDistance":500}}))
    design = list(designers.find({"geojson":{"$near":i, "$maxDistance":2000}}))
    software = list(offices.find({"$and":[{"geojson":{"$near":i, "$maxDistance":2000}}, {"category_code": "software"}]}))
    sch = list(schools.find({"geojson":{"$near":i, "$maxDistance":5000}}))
    n_final_dic["name"] = names
    n_final_dic["number_of_starbacks_500m"] = len(starb)
    n_final_dic["number_of_design_companies_2000"] = len(design)
    n_final_dic["number_of_software_companies_2000"] = len(software)
    n_final_dic["number_of_schools_5000"] = len(sch)
    n_final_list.append(n_final_dic)
#get all the near locations

In [72]:
df_software=pd.DataFrame(n_final_list)#create dataframe to points

In [73]:
df_software["points1"]=df_software["number_of_starbacks_500m"].apply(get_points_starbucks)

In [74]:
df_software["points2"]=df_software["number_of_design_companies_2000"].apply(get_points_companies)

In [75]:
df_software["points3"]=df_software["number_of_software_companies_2000"].apply(get_points_companies)

In [76]:
df_software["points4"]=df_software["number_of_schools_5000"].apply(get_points_schools)

In [77]:
df_software.head()

Unnamed: 0,name,number_of_starbacks_500m,number_of_design_companies_2000,number_of_software_companies_2000,number_of_schools_5000,points1,points2,points3,points4
0,Splunk,0,12,33,10,0,6,6,7
1,Globant,3,27,36,13,3,6,6,7
2,Razz,6,18,12,44,3,6,6,7
3,Keibi Technologies,0,27,33,10,0,6,6,7
4,Telespree,3,9,33,16,3,6,6,7


In [78]:
df_software["points"] = df_software["points1"] + df_software["points2"] + df_software["points3"] + df_software["points4"]
#apply the functions and add the points

In [79]:
df_software.drop(columns = ["points1", "points2", "points3", "points4"]).head()

Unnamed: 0,name,number_of_starbacks_500m,number_of_design_companies_2000,number_of_software_companies_2000,number_of_schools_5000,points
0,Splunk,0,12,33,10,19
1,Globant,3,27,36,13,22
2,Razz,6,18,12,44,22
3,Keibi Technologies,0,27,33,10,19
4,Telespree,3,9,33,16,22


The final scores are in. And the company that meets the most requirements is Globant. So, we already have the point where we are going to install the company. We are going to build our company on top of yours!!!!

# The final Choice

In [80]:
glob = list(offices.find({"name":"Globant"}, {"_id":0, "offices.longitude":1, "offices.latitude":1})) #get Globant

In [81]:
glob

[{'offices': {'latitude': 37.7819286, 'longitude': -122.4041764}},
 {'offices': {'latitude': 37.7819286, 'longitude': -122.4041764}},
 {'offices': {'latitude': 37.7819286, 'longitude': -122.4041764}}]

In [82]:
for i in glob:
    globant = i.get("offices")

In [83]:
globant
#get coordinates

{'latitude': 37.7819286, 'longitude': -122.4041764}

In [84]:
globant_coords = [i for i in globant.values()]

In [None]:
globant_coords #clean coordinates

In [None]:
map_globant = folium.Map(location = globant_coords, zoom_start=15)#create map

In [None]:
glob_icon = Icon(color = "blue",
             prefix = "fa",
             icon = "id-card",
             icon_color = "black")

glo = Marker(location = globant_coords, icon=glob_icon, tooltip="Globant")
glo.add_to(map_globant)
map_globant

Here we go!!!!