# Q2

En esta parte, la tarea consiste en llamar a MongoDB para realizar geoqueries y representar los datos geoespaciales con folium, mostrando en un mapa la geolocalizacion de la nueva oficina.

In [101]:
# Tu codigo aqui
from pymongo import MongoClient,GEOSPHERE
import folium
from bson.son import SON
import pandas as pd

In [80]:
client=MongoClient()
db=client.companiesDB

In [106]:
m = folium.Map(
    location=[39.157898,-101.646620],
    zoom_start=4
)

In [107]:
for loc in list(db.companies2.find({'type':'Big company'})):
    folium.CircleMarker([loc['loc_office']['coordinates'][1],loc['loc_office']['coordinates'][0]],color='red',radius=5,tooltip=(str(loc['loc_office']['coordinates'])+loc['name'])).add_to(m)

for loc in list(db.companies2.find({'type':'Start-up'})):
    folium.CircleMarker([loc['loc_office']['coordinates'][1],loc['loc_office']['coordinates'][0]],color='blue',radius=5,tooltip=(str(loc['loc_office']['coordinates'])+loc['name'])).add_to(m)

m

In [108]:
# Para cada una de las empresas que encontramos podemos la cantidad de empresas a su alrededor y su
# ratio "Big/Startups"

In [109]:
db.companies2.create_index([("loc_office",GEOSPHERE)])

'loc_office_2dsphere'

In [182]:
def search_2000m(x):
    near = db.companies2.find({"loc_office": {"$near": {"$geometry":x, "$maxDistance": 2000}}})
    near = near[1:]
    # A partir de las empresas encontradas al rededor de la empresa seleccionada, calculamos su ratio "Big/Startup"
    types_near = [1 if empr['type'] == 'Start-up' else 2 for empr in near]
    starts = types_near.count(1)
    bigs = types_near.count(2)
    if starts != 0:
        ratio = bigs/starts
    else:
        ratio = -1
    return (ratio,bigs,starts)

In [183]:
# Almacenamos la información en un DataFrame
companies = db.companies2.find()
companies = pd.DataFrame.from_dict(companies)
companies['ratio_Big_Start'] = companies['loc_office'].apply(search_2000m)

In [184]:
# Según el ratio calculado un valor de -1 es un punto en el cual tendríamos solo compañias grandes al rededor
# un ratio con valor mayor a 1 significa una mayor cantidad de empresas grandes que de startups,
# un ratio con valor menor a 1 y mayor a 0 significa que se tiene una mayor cantidad de startups al rededor.
# Todo en un radio de 2km
companies.sort_values('ratio_Big_Start',ascending=False)

Unnamed: 0,_id,category_code,founded_year,loc_office,name,number_of_employees,total_money_raised,type,ratio_Big_Start
21,5d58bb1119ab40d9eb30ed03,web,2005,"{'type': 'Point', 'coordinates': [-122.301534,...",Zvents,55,$31.2M,Big company,"(3.0, 3, 1)"
1,5d58bb1119ab40d9eb30ecef,web,1995,"{'type': 'Point', 'coordinates': [-121.930035,...",eBay,15000,$6.7M,Big company,"(3.0, 3, 1)"
31,5d58bb1119ab40d9eb30ed0d,network_hosting,2004,"{'type': 'Point', 'coordinates': [-121.935688,...",Hightail,200,$82.7M,Big company,"(3.0, 3, 1)"
125,5d58bb1119ab40d9eb30ed6b,web,2004,"{'type': 'Point', 'coordinates': [-122.307887,...",SugarSync,60,$61M,Big company,"(3.0, 3, 1)"
184,5d58bb1119ab40d9eb30eda6,web,2007,"{'type': 'Point', 'coordinates': [-122.227765,...",GreenNote,65,$4.2M,Start-up,"(3.0, 3, 1)"
99,5d58bb1119ab40d9eb30ed51,network_hosting,2005,"{'type': 'Point', 'coordinates': [-122.300246,...",fabrik,175,$51.3M,Big company,"(3.0, 3, 1)"
92,5d58bb1119ab40d9eb30ed4a,mobile,2006,"{'type': 'Point', 'coordinates': [-122.230116,...",Qik,60,$14.8M,Start-up,"(3.0, 3, 1)"
319,5d58bb1119ab40d9eb30ee2d,mobile,1999,"{'type': 'Point', 'coordinates': [-122.301534,...",Sonim Technologies,200,$42.6M,Big company,"(3.0, 3, 1)"
323,5d58bb1119ab40d9eb30ee31,web,2003,"{'type': 'Point', 'coordinates': [-121.9303616...",Naseeb Networks,100,$2M,Big company,"(3.0, 3, 1)"
362,5d58bb1119ab40d9eb30ee58,web,2008,"{'type': 'Point', 'coordinates': [-121.928393,...",Smallaa,5,$3M,Start-up,"(3.0, 3, 1)"


In [185]:
ext_rat = lambda x: x[0]
ext_bigs = lambda x: x[1]
ext_starts = lambda x: x[2]
companies['ratioBS'] = companies['ratio_Big_Start'].apply(ext_rat)
companies['Bigs'] = companies['ratio_Big_Start'].apply(ext_bigs)
companies['Startup'] = companies['ratio_Big_Start'].apply(ext_starts)

In [190]:
# Por lo cual nos conviene tener un ratio positivo, menor a 1.5 y mayor a 0.8
best_options = companies.loc[(companies.ratioBS > 0.8) & (companies.ratioBS < 1.5) & (companies.ratioBS != 0.0)]
best_options = best_options.drop('ratio_Big_Start',axis=1)
# Nuestro último criterio es que haya la mayor cantidad posible de Startups
best_options = best_options.sort_values('Startup',ascending=False)
# Tomamos las 5 mejores locaciones
best_options = best_options.head(5)
best_options

Unnamed: 0,_id,category_code,founded_year,loc_office,name,number_of_employees,total_money_raised,type,ratioBS,Bigs,Startup
378,5d58bb1119ab40d9eb30ee68,web,2009,"{'type': 'Point', 'coordinates': [-73.986951, ...",Udorse,3,$500k,Start-up,0.818182,9,11
347,5d58bb1119ab40d9eb30ee49,web,2008,"{'type': 'Point', 'coordinates': [-73.986951, ...",Wee Web,5,$270k,Start-up,0.818182,9,11
304,5d58bb1119ab40d9eb30ee1e,web,2008,"{'type': 'Point', 'coordinates': [-73.986951, ...",People Capital,10,$2.5M,Start-up,0.818182,9,11
273,5d58bb1119ab40d9eb30edff,mobile,2007,"{'type': 'Point', 'coordinates': [-73.991334, ...",Buzzd,9,$4M,Start-up,0.9,9,10
149,5d58bb1119ab40d9eb30ed83,ecommerce,2007,"{'type': 'Point', 'coordinates': [-73.9954585,...",Bonobos,150,$74.7M,Start-up,1.111111,10,9


In [194]:
m2 = folium.Map(
    location=[39.157898,-101.646620],
    zoom_start=4
)

for loc in list(db.companies2.find({'type':'Big company'})):
    folium.CircleMarker([loc['loc_office']['coordinates'][1],loc['loc_office']['coordinates'][0]],color='red',radius=5,tooltip=(str(loc['loc_office']['coordinates'])+loc['name'])).add_to(m2)

for loc in list(db.companies2.find({'type':'Start-up'})):
    folium.CircleMarker([loc['loc_office']['coordinates'][1],loc['loc_office']['coordinates'][0]],color='blue',radius=5,tooltip=(str(loc['loc_office']['coordinates'])+loc['name'])).add_to(m2)

for comp in range(best_options.shape[0]):
    print(best_options.iloc[comp])
    folium.Marker([best_options.iloc[comp].loc['loc_office']['coordinates'][1],best_options.iloc[comp].loc['loc_office']['coordinates'][0]],tooltip=best_options.iloc[comp].loc['ratioBS']).add_to(m2)  
m2

_id                                             5d58bb1119ab40d9eb30ee68
category_code                                                        web
founded_year                                                        2009
loc_office             {'type': 'Point', 'coordinates': [-73.986951, ...
name                                                              Udorse
number_of_employees                                                    3
total_money_raised                                                 $500k
type                                                            Start-up
ratioBS                                                         0.818182
Bigs                                                                   9
Startup                                                               11
Name: 378, dtype: object
_id                                             5d58bb1119ab40d9eb30ee49
category_code                                                        web
founded_year              

In [None]:
# Finalmente encontramos como muy buena opción la ciudad de New York, según las condiciones de clasificación 
# seleccionadas.