# Searching for potential locations for the new office

In this project we have to find a new location for our company.

It should fit several conditions in the best way.


We will use the data base Companies in MongoDB to find similar companies.

Our company is related to gaming industry.

It has to be placed in Madrid, Spain.

In [1]:
from pymongo import MongoClient
from src import api_functions as af
import pandas as pd
import json

In [2]:
client = MongoClient("localhost:27017")
db = client.get_database("Ironhack")
c = db.get_collection("Companies")

In [3]:
target_competitors = {"_id":0, "name":1, "category_code":1, "offices":1}
competitors        = list(c.find({},target_competitors))
len(competitors)

18801

In [4]:
competitors_df                 = pd.DataFrame(competitors)
competitors_df["qt_offices"]   = competitors_df.apply(lambda fila : len(fila.offices) , axis=1)
competitors_df["country_code"] = competitors_df.apply(lambda fila : fila.offices[0]["country_code"] if len(fila.offices)>0 else "", axis=1)
competitors_df["address1"]     = competitors_df.apply(lambda fila : fila.offices[0]["address1"]     if len(fila.offices)>0 else "", axis=1)
competitors_df["address2"]     = competitors_df.apply(lambda fila : fila.offices[0]["address2"]     if len(fila.offices)>0 else "", axis=1)
competitors_df["city"]         = competitors_df.apply(lambda fila : fila.offices[0]["city"]         if len(fila.offices)>0 else "", axis=1)
competitors_df["zip_code"]     = competitors_df.apply(lambda fila : fila.offices[0]["zip_code"]     if len(fila.offices)>0 else "", axis=1)
competitors_df["latitude"]     = competitors_df.apply(lambda fila : fila.offices[0]["latitude"]     if len(fila.offices)>0 else "", axis=1)
competitors_df["longitude"]    = competitors_df.apply(lambda fila : fila.offices[0]["longitude"]    if len(fila.offices)>0 else "", axis=1)
competitors_df["punto"]        = competitors_df.apply(lambda fila : af.type_point([fila["latitude"] , fila['longitude']]), axis=1)

print(competitors_df.shape)

(18801, 12)


In [5]:
#filtering to have just the companies located in Madrid.
competitors_df                 = competitors_df[competitors_df["city"] == 'Madrid']
competitors_df.reset_index(drop=True , inplace = True)

print(competitors_df.shape)

(58, 12)


In [6]:
competitors_df.category_code.unique()

array(['mobile', 'public_relations', 'web', 'games_video', 'enterprise',
       'software', 'ecommerce', 'consulting', 'social', 'advertising',
       'finance', 'other', 'cleantech'], dtype=object)

In [7]:
#filtering to have just the "games_video" related companies, our competitors.
competitors_df = competitors_df[competitors_df["category_code"] == 'games_video']
competitors_df.reset_index(drop=True , inplace = True)
print(competitors_df.shape)

#just fix one value with weird caracters to avoid later problems with geocode api:
competitors_df.loc[[1],["address1"]] = "calle Princesa, 31" 

(5, 12)


## Using Geocode API to find the latitude and longitud of some elements that are None


In [8]:
competitors_df["punto"]     = competitors_df.apply(af.corregir_lat_long , axis=1)
competitors_df["latitude"]  = competitors_df.apply(af.sacar_lat , axis=1)
competitors_df["longitude"] = competitors_df.apply(af.sacar_long , axis=1)

#cleaning innecessary columns:
competitors_df.drop(columns=['category_code',
                             'offices',
                             "qt_offices",
                             'city' ,
                             'country_code' ,
                             'address1' , 
                             'address2' ,
                             'zip_code'] , 
                    inplace = True)

competitors_df


Latitud : 40.43094 , Longitud: -3.6952896 correctos.
     Latitud o Longitud es none. Llamamos a geocode con la direccion.
Latitud : 40.42792 , Longitud: -3.7141 CORREGIDOS.
Latitud : 40.4380872 , Longitud: -3.6760635 correctos.
Latitud : 40.4273082 , Longitud: -3.6895115 correctos.
Latitud : 40.6367104 , Longitud: -3.9980485 correctos.


Unnamed: 0,name,latitude,longitude,punto
0,eRepublik,40.43094,-3.69529,"{'type': 'Point', 'coordinates': [40.43094, -3..."
1,VivoCom,40.42792,-3.7141,"{'type': 'Point', 'coordinates': [40.42792, -3..."
2,Entropy Computational Services,40.438087,-3.676064,"{'type': 'Point', 'coordinates': [40.4380872, ..."
3,Virgin Play,40.427308,-3.689512,"{'type': 'Point', 'coordinates': [40.4273082, ..."
4,Undead Code Studios,40.63671,-3.998048,"{'type': 'Point', 'coordinates': [40.6367104, ..."


## A list of the competitors locations, including lat&long is ready. Export to csv.


In [19]:
competitors_df.to_csv("data/competitors.csv",index=False)
competitors_js = competitors_df.to_json(orient = 'records')
with open ("data/competitors.json","w") as f:    # creating an empty file
    json.dump(competitors_js,f)                  # in order to export all the data downloaded from the api.


## Ploting a map with the competitors locations

In [10]:
import folium
from folium import Choropleth, Circle, Marker, Icon, Map

In [11]:
latitud_central = competitors_df.latitude.mean()
longitud_central = competitors_df.longitude.mean()

map = folium.Map(location=[ latitud_central , longitud_central ] , zoom_start=10)

In [12]:
for i,row in competitors_df.iterrows():
    
    icono = Icon(color      = "green",
                 opacity    = 0.3    ,
                 prefix     = "fa"   ,
                 icon       = "gamepad" ,
                 icon_color = "black")
    
    distrito = {"location": [row["latitude"], row["longitude"]], "tooltip": row["name"]}
    mark = Marker(**distrito, icon=icono)
    mark.add_to(map)
map

In [13]:
map.save("output/01_Potential_locations_identification_(competitors_in_Madrid).html")