# 5.4- Office finder 

#### Encontramos tu oficina

In [None]:
import pymongo

import pandas as pd

import re

import folium

In [None]:
# conexion a mongo

cliente=pymongo.MongoClient()

db=cliente.companies

In [None]:
def data():    # funcion para realizar query a mongoDB
    query=db.companies.find({'$and':[{'$or': [{'category_code':'games_video'},
                                               {'category_code':'software'},
                                               {'category_code':'web'},
                                               {'category_code':'mobile'},
                                               {'category_code':'photo_video'},
                                               {'category_code':'ecommerce'},
                                               {'category_code':'search'},
                                               {'category_code':'network_hosting'}]},
                             {'founded_year': {'$gte':1999}},
                             {'offices': {'$ne':[]}}, 
                             {'offices.latitude': {'$ne':[]}},
                             {'offices.longitude': {'$ne':[]}},
                             {'offices.latitude': {'$ne':None}},
                             {'offices.longitude': {'$ne':None}},         
                             {'deadpooled_year':None},
                             {'number_of_employees': {'$ne':None}},
                             {'total_money_raised': {'$ne':None}}]},

                             {'name':1, '_id':0, 'offices.latitude':1, 'offices.longitude':1, 
                              'founded_year':1, 'category_code':1, 'number_of_employees':1,
                              'total_money_raised':1})


    df=pd.json_normalize(data=query, record_path='offices',
                      meta=['category_code', 'name', 'number_of_employees',
                            'founded_year', 'total_money_raised'])
    
    df.insert(2, 'geo_loc', pd.Series(list(range(len(df)))))
    df['geo_loc']=df.apply(lambda x: [x['longitude'], x['latitude']], axis=1)
    
    return df

In [None]:
data().head()

In [None]:
def exchange(df):
    res=[]
    number=1
    symbol=1
    
    for e in df:
        if e[-1]=='k': number=1e3
        elif e[-1]=='M': number=1e6
        elif e[-1]=='B': number=1e9
            
        if e[0]=='£': symbol=1.3
        elif e[0]=='€' : symbol=1.12
            
        n_df=re.findall('[0-9.]+', e)
        
        res.append(int(float(n_df[0])*number*symbol))
        
    return res

In [None]:
def classifying(date, employees):
    
    company_class=[]
    
    for i in range(len(date)):
        
        if date[i]>=2008 and employees[i]<100:
            company_class.append('StartUp')
            
        else:
            company_class.append('Big')
            
    return company_class

In [None]:
df=data()

len(df)

In [None]:
df['n_total_money']=exchange(df.total_money_raised)

df['company_class']=classifying(df.founded_year, df.number_of_employees)

df.head()

In [None]:
db.geoloc.insert_many(df.to_dict('records'))

In [None]:
db.geoloc.create_index([('geo_loc', '2dsphere')])

In [None]:
# geolocalizacion

def geoquery(df):
    
    list_money_mean=[]
    sb_ratio=[]
    n_companies=[]
    
    for i in range(len(df)):
        
        q=list(
            db.geoloc.find(
                {'geo_loc': {'$near':{'$geometry':{
                    'type':'Point',
                    'coordinates':[df.longitude[i], df.latitude[i]]
                },
                                     '$maxDistance':1000}}}
            )
        )
        
        l=len(q)
        
        money_mean=sum([q[j]['n_total_money'] for j in range(l)])/l
        
        list_money_mean.append(money_mean)
        
        big=sum([q[k]['company_class'].count('Big') if q[k]['company_class'].count('Big')!=0 else 1 for k in range(l)])
        start=sum([q[k]['company_class'].count('StartUp') for k in range(l)])
        
        sb_ratio.append(start/big)
        
        n_companies.append(l)
        
    df['around_companies']=n_companies

    df['money_density']=list_money_mean

    df['sb_ratio']=sb_ratio
        
    return df

In [None]:
def geomap(df, zoom_start=1.5):

    df.index=range(len(df)) 

    mapa=folium.Map(location=[0, 0],
                tiles='openstreetmap', zoom_start=zoom_start)

    for i in range(1, len(df)):

        folium.CircleMarker([float(df.latitude[i]), float(df.longitude[i])], popup=str(df.name[i]), 
                             radius=1, icon=folium.Icon()).add_to(mapa)

    mapa.save('images/geomap.html')
    
    
    return mapa

In [None]:
df=geoquery(df)

df.head()

In [None]:
df.describe()

In [None]:
mapa=geomap(df.loc[:1000])

mapa

In [None]:
print('Compañias alrededor>=10, sb_ratio>0.45, densidad monetaria>1.000.000$')

data=df[(df.around_companies>=10) & (df.sb_ratio>0.45) & (df.money_density>1e6)]

mapa2=geomap(data)

mapa2

In [None]:
print('Compañias alrededor>=70, sb_ratio>0.25, densidad monetaria>10.000.000$')

data=df[(df.around_companies>=70) & (df.sb_ratio>0.25) & (df.money_density>1e7)]

mapa3=geomap(data)

mapa3