In [7]:
from SRC.func_geoquery import *
from SRC.func_for_api import *
import requests
import pandas as pd
from pymongo import MongoClient
dbName = "companies"
mongodbURL = f"mongodb://localhost/{dbName}"

client = MongoClient(mongodbURL, connectTimeoutMS=2000, serverSelectionTimeoutMS=2000)
db = client.get_database()
client = MongoClient()


## First steps 

I want to find the best emplacement for gaming company, for this, in first place I investigated other successful companies that work in related areas. 

Therefore I decide to perform quite a large query with all the information I want, and excluding those data that will not help me to find a nice place for my company.

In the following query I chose:
- Those companies that were born after 2005
- That work in USD
- Whose acquisition price is greater that 1000000
- And that work on software, design or web (three features that I consider importat for my gaming company)

In [8]:
query = {"$and": [{"founded_year":{"$gte":2005}},
                  {"acquisition.price_currency_code":{"$eq":"USD"}},
                  {"acquisition.price_amount":{"$gt":10000000}}, 
                  {"$or":[{"category_code":{"$eq":"software"}},
                          {"category_code":{"$eq":"design"}}, 
                          {"category_code":{"$eq":"games_video"}},
                          {"category_code":{"$eq":"web"}}]}]}
companies = list(db["companies"].find(query,{"offices":1, "name":1, "founded_year": 1, "category_code":1, "acquisition.price_amount":1}))
len(companies)

42

From this query I obtain 58 companies, I am going to find their offices, study their emplacement and try to figure out  a nice place for my office.


In [9]:
import pandas as pd
df = pd.DataFrame(list(companies)) 


df.head()
df.shape

(42, 6)

I explode the offices column in order to transform each element of the offices category, in a row. With this method 
I obtain information about the latitude, longitude, and also about the address of the offices of these companies.

In [10]:

offices = df.explode("offices").apply(lambda e: e.offices,axis=1,result_type="expand")


offices.head()
offices.shape

(58, 9)

And now I concat my former dataframe with the df obtained with all the information from the different offices


In [11]:
clean_offices = pd.concat([df[["name","_id", "category_code","acquisition", "founded_year"]], offices], axis=1)

clean_offices.head()


Unnamed: 0,name,_id,category_code,acquisition,founded_year,description,address1,address2,zip_code,city,state_code,country_code,latitude,longitude
0,Wetpaint,52cdef7c4bab8bd675297d8a,web,{'price_amount': 30000000},2005,,710 - 2nd Avenue,Suite 1100,98104,Seattle,WA,USA,47.603122,-122.333253
0,Wetpaint,52cdef7c4bab8bd675297d8a,web,{'price_amount': 30000000},2005,,270 Lafayette Street,Suite 505,10012,New York,NY,USA,40.723731,-73.996431
1,YouTube,52cdef7c4bab8bd675297dbd,games_video,{'price_amount': 1650000000},2005,Corporate Headquarters,901 Cherry Ave,,94066,San Bruno,CA,USA,37.627971,-122.426804
2,Revision3,52cdef7c4bab8bd675297dcb,games_video,{'price_amount': 30000000},2005,HQ,2415 3rd St,,94107,San Francisco,CA,USA,37.757758,-122.388243
3,Meebo,52cdef7c4bab8bd675297e09,web,{'price_amount': 100000000},2005,Headquarters,215 Castro Street,3rd Floor,94041,Mountain View,CA,USA,37.393679,-122.079017


#### Geopoints values latitude and longitude

In [12]:
clean_offices["geopoint"] = clean_offices.apply(transformToGeoPoint, axis=1)

In [13]:
clean_offices.shape#(58, 14)
clean_offices.head()

Unnamed: 0,name,_id,category_code,acquisition,founded_year,description,address1,address2,zip_code,city,state_code,country_code,latitude,longitude,geopoint
0,Wetpaint,52cdef7c4bab8bd675297d8a,web,{'price_amount': 30000000},2005,,710 - 2nd Avenue,Suite 1100,98104,Seattle,WA,USA,47.603122,-122.333253,"{'type': 'Point', 'coordinates': [-122.333253,..."
0,Wetpaint,52cdef7c4bab8bd675297d8a,web,{'price_amount': 30000000},2005,,270 Lafayette Street,Suite 505,10012,New York,NY,USA,40.723731,-73.996431,"{'type': 'Point', 'coordinates': [-73.9964312,..."
1,YouTube,52cdef7c4bab8bd675297dbd,games_video,{'price_amount': 1650000000},2005,Corporate Headquarters,901 Cherry Ave,,94066,San Bruno,CA,USA,37.627971,-122.426804,"{'type': 'Point', 'coordinates': [-122.426804,..."
2,Revision3,52cdef7c4bab8bd675297dcb,games_video,{'price_amount': 30000000},2005,HQ,2415 3rd St,,94107,San Francisco,CA,USA,37.757758,-122.388243,"{'type': 'Point', 'coordinates': [-122.388243,..."
3,Meebo,52cdef7c4bab8bd675297e09,web,{'price_amount': 100000000},2005,Headquarters,215 Castro Street,3rd Floor,94041,Mountain View,CA,USA,37.393679,-122.079017,"{'type': 'Point', 'coordinates': [-122.079017,..."


To keep the id of each company, I rename de id columns into company_id and then I convert it to str to convert it to json

In [14]:
clean_offices = clean_offices.rename(columns={"_id":"company_id"})
clean_offices.shape

(58, 15)

In [15]:
clean_offices["company_id"] = clean_offices["company_id"].apply(lambda e: str(e))


### Choosing my city

I already filterd by those companies whose acquisition price amount was greater than 10000000. I understand that those companies are successful enough, and thus only these companies will be used for this approach. Once filtered, I don't need more information about that so I can clean that column from my dataset. 
I also chose "modern" companies, trying to select those companies that have conducted a quite recent market research. As this dataset is not updated and as I needed at least some of them for this study, I chose 2005 as the lower limit, but now I am going to go deeper into the founded year to try to find a modern one.

In [16]:
clean_offices.head()

Unnamed: 0,name,company_id,category_code,acquisition,founded_year,description,address1,address2,zip_code,city,state_code,country_code,latitude,longitude,geopoint
0,Wetpaint,52cdef7c4bab8bd675297d8a,web,{'price_amount': 30000000},2005,,710 - 2nd Avenue,Suite 1100,98104,Seattle,WA,USA,47.603122,-122.333253,"{'type': 'Point', 'coordinates': [-122.333253,..."
0,Wetpaint,52cdef7c4bab8bd675297d8a,web,{'price_amount': 30000000},2005,,270 Lafayette Street,Suite 505,10012,New York,NY,USA,40.723731,-73.996431,"{'type': 'Point', 'coordinates': [-73.9964312,..."
1,YouTube,52cdef7c4bab8bd675297dbd,games_video,{'price_amount': 1650000000},2005,Corporate Headquarters,901 Cherry Ave,,94066,San Bruno,CA,USA,37.627971,-122.426804,"{'type': 'Point', 'coordinates': [-122.426804,..."
2,Revision3,52cdef7c4bab8bd675297dcb,games_video,{'price_amount': 30000000},2005,HQ,2415 3rd St,,94107,San Francisco,CA,USA,37.757758,-122.388243,"{'type': 'Point', 'coordinates': [-122.388243,..."
3,Meebo,52cdef7c4bab8bd675297e09,web,{'price_amount': 100000000},2005,Headquarters,215 Castro Street,3rd Floor,94041,Mountain View,CA,USA,37.393679,-122.079017,"{'type': 'Point', 'coordinates': [-122.079017,..."


The most successful companies are located in San Francisco, New York and London

In [17]:
clean_offices["city"].value_counts().head()

San Francisco    9
New York         5
London           4
Palo Alto        3
Mountain View    3
Name: city, dtype: int64

Seven of them were founded in 2008

In [18]:
round(clean_offices["city"].value_counts(normalize=True)*100).head()


San Francisco    16.0
New York          9.0
London            7.0
Palo Alto         5.0
Mountain View     5.0
Name: city, dtype: float64

When I check to find in which cities were they founded, I can see that two of them were located in San Francisco and one of them in London, and remember that the most successful companies were place in SF, NY and London. 
Now I just have to choose between SF and London, as both places meet the conditions I think are best to undertake this project. 
My investors are from european, so I am pretty sure they will prefer London over San Francisco. 
Let's find whether in London they can be confortable. They have some requirements to fulfill.

In [19]:
clean_offices["founded_year"].value_counts()

2005    19
2006    17
2007    15
2008     7
Name: founded_year, dtype: int64

In [20]:
clean_offices["city"][clean_offices["founded_year"]==2008].value_counts()


Palo Alto        2
Sunnyvale        1
San Francisco    1
Solana Beach     1
Netanya          1
London           1
Name: city, dtype: int64

## London, a successful and close place to break ground!

As I read about London, I already see it meets some of the features we want to have: It has aeroports, it is successful, it is well known all over the world, and it is also known by being the greenest city in Europe. Its a confortable place with many cultures mixed, leisure actitivies and great economic activity and growth.  

Let's investigate London a little bit (source:wikipedia):

London is the capital and largest city of England and the United Kingdom. Standing on the River Thames in the south-east of England, at the head of its 50-mile (80 km) estuary leading to the North Sea, London has been a major settlement for two millennia. 

London is considered to be one of the world's most important global cities and has been called the **world's most powerful, most desirable, most influential, most visited, most expensive, sustainable, most investment-friendly,and most-popular-for-work city.** 


It exerts a considerable impact upon the arts, commerce, education, entertainment, fashion, finance, healthcare, media, professional services, **research and development**, tourism and transportation.London ranks 26th out of 300 major cities for economic performance.**It is one of the largest financial centres and has either the fifth- or the sixth-largest metropolitan area GDP.**

It is the most-visited city as measured by international arrivals and has the **busiest city airport system as measured by passenger traffic**.

London has a diverse range of people and cultures, and more than 300 languages are spoken in the region.

**The London Underground is the oldest underground railway network in the world.**

A 2013 report by the City of London Corporation said that London is the **"greenest city" in Europe with 35,000 acres of public parks, woodlands and gardens.**  The largest parks in the central area of London are three of the eight Royal Parks, namely Hyde Park and its neighbour Kensington Gardens in the west, and Regent's Park to the north. Hyde Park in particular is popular for sports and sometimes hosts open-air concerts. Regent's Park contains London Zoo, the world's oldest scientific zoo, and is near Madame Tussauds Wax Museum. Primrose Hill, immediately to the north of Regent's Park, at 256 feet (78 m) is a popular spot from which to view the city skyline.

## Companies in London

I am going to investigate and keep information regarding the companies in London

In [21]:
inlondon = clean_offices[clean_offices["city"]=="London"]
inlondon = inlondon[["name", "category_code", "founded_year", "latitude","longitude", "geopoint"]]
inlondon

Unnamed: 0,name,category_code,founded_year,latitude,longitude,geopoint
12,amiando,web,2006,51.52486,-0.083631,"{'type': 'Point', 'coordinates': [-0.0836311, ..."
16,Playfish,games_video,2007,51.499109,-0.19848,"{'type': 'Point', 'coordinates': [-0.1984802, ..."
29,TweetDeck,software,2008,51.500152,-0.126236,"{'type': 'Point', 'coordinates': [-0.1262362, ..."
30,Saffron Digital,games_video,2006,,,


In [22]:
inlondon = inlondon.reset_index()
inlondon

Unnamed: 0,index,name,category_code,founded_year,latitude,longitude,geopoint
0,12,amiando,web,2006,51.52486,-0.083631,"{'type': 'Point', 'coordinates': [-0.0836311, ..."
1,16,Playfish,games_video,2007,51.499109,-0.19848,"{'type': 'Point', 'coordinates': [-0.1984802, ..."
2,29,TweetDeck,software,2008,51.500152,-0.126236,"{'type': 'Point', 'coordinates': [-0.1262362, ..."
3,30,Saffron Digital,games_video,2006,,,


In [23]:
#I don't have geopoint information about Saffron Digital so I drop that company
inlondon = inlondon.drop(inlondon.index[[3]])
inlondon

Unnamed: 0,index,name,category_code,founded_year,latitude,longitude,geopoint
0,12,amiando,web,2006,51.52486,-0.083631,"{'type': 'Point', 'coordinates': [-0.0836311, ..."
1,16,Playfish,games_video,2007,51.499109,-0.19848,"{'type': 'Point', 'coordinates': [-0.1984802, ..."
2,29,TweetDeck,software,2008,51.500152,-0.126236,"{'type': 'Point', 'coordinates': [-0.1262362, ..."


I save my final dataframe with three offices in london