In [270]:
from pymongo import MongoClient
import pandas as pd
import folium
from folium import Choropleth, Circle, Marker, Icon, Map

In [2]:
# We connect with our Mongo client to get the name of our current databases.
conn = MongoClient("localhost:27017")
conn.list_database_names()

['admin', 'config', 'ironhack', 'local']

In [3]:
# We get access to our database and print our collections 
db = conn.get_database("ironhack")
db.list_collection_names()

['companies', 'restaurants', 'countries_small', 'books']

In [115]:
# And finally we access to the collections we are going to work with!
collection = db.get_collection("companies")
collection.find_one({})

{'_id': ObjectId('52cdef7c4bab8bd675297d8a'),
 'name': 'Wetpaint',
 'permalink': 'abc2',
 'crunchbase_url': 'http://www.crunchbase.com/company/wetpaint',
 'homepage_url': 'http://wetpaint-inc.com',
 'blog_url': 'http://digitalquarters.net/',
 'blog_feed_url': 'http://digitalquarters.net/feed/',
 'twitter_username': 'BachelrWetpaint',
 'category_code': 'web',
 'number_of_employees': 47,
 'founded_year': 2005,
 'founded_month': 10,
 'founded_day': 17,
 'deadpooled_year': 1,
 'tag_list': 'wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system',
 'alias_list': '',
 'email_address': 'info@wetpaint.com',
 'phone_number': '206.859.6300',
 'description': 'Technology Platform Company',
 'created_at': datetime.datetime(2007, 5, 25, 6, 51, 27),
 'updated_at': 'Sun Dec 08 07:15:44 UTC 2013',
 'overview': '<p>Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for di

In [116]:
# We print only the offices value, because it's our main interest from this database.
collection.find_one({}, {"offices": 1})

{'_id': ObjectId('52cdef7c4bab8bd675297d8a'),
 'offices': [{'description': '',
   'address1': '710 - 2nd Avenue',
   'address2': 'Suite 1100',
   'zip_code': '98104',
   'city': 'Seattle',
   'state_code': 'WA',
   'country_code': 'USA',
   'latitude': 47.603122,
   'longitude': -122.333253},
  {'description': '',
   'address1': '270 Lafayette Street',
   'address2': 'Suite 505',
   'zip_code': '10012',
   'city': 'New York',
   'state_code': 'NY',
   'country_code': 'USA',
   'latitude': 40.7237306,
   'longitude': -73.9964312}]}

#### Now we need to contemplate the criteria that we want to fullfill in order to make sure we localize our company in the right place.
### The places that we want to make sure we are close to are:

- 30% of the company staff have at least 1 child. (schools close)
- Near successful tech startups that have raised at least 1 Million dollars.
- An Airport
- Vegan restaurant

#### We chose the 4 things that we think are the best that we can try to accomplish.

In [98]:
# Let's see how many companies are ir our collection:
collection.count_documents({})

18801

In [76]:
# Now, we start to try to find all the spots that have to match our criteria:
collection.find({"name": {"$regex": "^Starbucks$"}}, {"name": 1, "offices": 1})

<pymongo.cursor.Cursor at 0x10acdbb80>

In [77]:
list(_)
# As we can see there's just one Starbucks in our database, but we don't have any useful info about its location.

[{'_id': ObjectId('52cdef7c4bab8bd67529879e'),
  'name': 'Starbucks',
  'offices': []}]

In [104]:
# We'll try to find now some design companies:
collection.find({"name": {"$regex": "^Design$"}}, {"name": 1, "offices": 1})

<pymongo.cursor.Cursor at 0x10acccaf0>

In [105]:
list(_)
# There's not any company also which has the word "Design" in its name, with or without first capital letter.

[]

In [171]:
# One of the things we can obtain from this database is all the companies that reside in San José (Silicon Valley)
# which is where we want to place our gaming company.
companies = list(collection.find({"offices.city": "San Jose", "total_money_raised": {"$regex": ".*\M$"}}, {"name": 1, "offices.city": 1, "total_money_raised": 1, "offices.latitude": 1, "offices.longitude": 1, "_id": 0}))

In [247]:
companies

[{'name': 'eBay',
  'total_money_raised': '$6.7M',
  'offices': [{'city': 'San Jose',
    'latitude': 37.295005,
    'longitude': -121.930035}]},
 {'name': 'Cisco',
  'total_money_raised': '$2.5M',
  'offices': [{'city': 'San Jose',
    'latitude': 37.408802,
    'longitude': -121.95377}]},
 {'name': 'PayPal',
  'total_money_raised': '$197M',
  'offices': [{'city': 'San Jose',
    'latitude': 37.294465,
    'longitude': -121.927696}]},
 {'name': 'BlueLithium',
  'total_money_raised': '$11.5M',
  'offices': [{'city': 'San Jose',
    'latitude': 37.32052,
    'longitude': -121.948399}]},
 {'name': 'Baynote',
  'total_money_raised': '$27.8M',
  'offices': [{'city': 'San Jose',
    'latitude': 37.332271,
    'longitude': -122.006539}]},
 {'name': 'BlackArrow',
  'total_money_raised': '$59.8M',
  'offices': [{'city': 'San Jose',
    'latitude': 37.549475,
    'longitude': -122.316982},
   {'city': 'New York', 'latitude': None, 'longitude': None}]},
 {'name': 'CDNetworks',
  'total_money_rai

In [248]:
# We print the dataframes to see how's it looking rigth now:
d = pd.DataFrame(collection.find({"offices.city": "San Jose", "total_money_raised": {"$regex": ".*\M$"}}, {"name": 1, "total_money_raised": 1, "offices.latitude": 1, "_id": 0}))
d

Unnamed: 0,name,total_money_raised,offices
0,eBay,$6.7M,[{'latitude': 37.295005}]
1,Cisco,$2.5M,[{'latitude': 37.408802}]
2,PayPal,$197M,[{'latitude': 37.294465}]
3,BlueLithium,$11.5M,[{'latitude': 37.32052}]
4,Baynote,$27.8M,[{'latitude': 37.332271}]
...,...,...,...
73,Force10 Networks,$116M,[{'latitude': 37.418636}]
74,SVTC Technologies,$34M,[{'latitude': None}]
75,D2S,$21.7M,[{'latitude': None}]
76,PaymentOne,$7M,[{'latitude': 37.248043}]


In [301]:
# And the second dataframe:
d1 = pd.DataFrame(collection.find({"offices.city": "San Jose", "total_money_raised": {"$regex": ".*\M$"}}, {"name": 1, "total_money_raised": 1, "offices.longitude": 1, "_id": 0}))
d1

Unnamed: 0,name,total_money_raised,offices
0,eBay,$6.7M,[{'longitude': -121.930035}]
1,Cisco,$2.5M,[{'longitude': -121.95377}]
2,PayPal,$197M,[{'longitude': -121.927696}]
3,BlueLithium,$11.5M,[{'longitude': -121.948399}]
4,Baynote,$27.8M,[{'longitude': -122.006539}]
...,...,...,...
73,Force10 Networks,$116M,[{'longitude': -121.944828}]
74,SVTC Technologies,$34M,[{'longitude': None}]
75,D2S,$21.7M,[{'longitude': None}]
76,PaymentOne,$7M,[{'longitude': -121.776935}]


In [282]:
!ls

Geo.ipynb        Google API.ipynb


In [286]:
# Let's import our function to clean this dataframe:
import sys
sys.path.append("../")
from main import mapa

In [298]:
# Now, we apply a function in order to remove the list from the column "offices":
mapa(df)
df

Unnamed: 0,name,total_money_raised,offices
0,eBay,$6.7M,{'latitude': 37.295005}
1,Cisco,$2.5M,{'latitude': 37.408802}
2,PayPal,$197M,{'latitude': 37.294465}
3,BlueLithium,$11.5M,{'latitude': 37.32052}
4,Baynote,$27.8M,{'latitude': 37.332271}
...,...,...,...
73,Force10 Networks,$116M,{'latitude': 37.418636}
74,SVTC Technologies,$34M,{'latitude': None}
75,D2S,$21.7M,{'latitude': None}
76,PaymentOne,$7M,{'latitude': 37.248043}


In [291]:
mapa(df)
df1

Unnamed: 0,name,total_money_raised,offices
0,eBay,$6.7M,{'longitude': -121.930035}
1,Cisco,$2.5M,{'longitude': -121.95377}
2,PayPal,$197M,{'longitude': -121.927696}
3,BlueLithium,$11.5M,{'longitude': -121.948399}
4,Baynote,$27.8M,{'longitude': -122.006539}
...,...,...,...
73,Force10 Networks,$116M,{'longitude': -121.944828}
74,SVTC Technologies,$34M,{'longitude': None}
75,D2S,$21.7M,{'longitude': None}
76,PaymentOne,$7M,{'longitude': -121.776935}


In [254]:
# Now again, we apply another function to get the value of latitude and create a new column out of it:
df_new = df.join(pd.DataFrame(df["offices"].to_dict()).T)
df_new

Unnamed: 0,name,total_money_raised,offices,latitude
0,eBay,$6.7M,{'latitude': 37.295005},37.295
1,Cisco,$2.5M,{'latitude': 37.408802},37.4088
2,PayPal,$197M,{'latitude': 37.294465},37.2945
3,BlueLithium,$11.5M,{'latitude': 37.32052},37.3205
4,Baynote,$27.8M,{'latitude': 37.332271},37.3323
...,...,...,...,...
73,Force10 Networks,$116M,{'latitude': 37.418636},37.4186
74,SVTC Technologies,$34M,{'latitude': None},
75,D2S,$21.7M,{'latitude': None},
76,PaymentOne,$7M,{'latitude': 37.248043},37.248


In [255]:
df1_new = df1.join(pd.DataFrame(df1["offices"].to_dict()).T)
df1_new

Unnamed: 0,name,total_money_raised,offices,longitude
0,eBay,$6.7M,{'longitude': -121.930035},-121.93
1,Cisco,$2.5M,{'longitude': -121.95377},-121.954
2,PayPal,$197M,{'longitude': -121.927696},-121.928
3,BlueLithium,$11.5M,{'longitude': -121.948399},-121.948
4,Baynote,$27.8M,{'longitude': -122.006539},-122.007
...,...,...,...,...
73,Force10 Networks,$116M,{'longitude': -121.944828},-121.945
74,SVTC Technologies,$34M,{'longitude': None},
75,D2S,$21.7M,{'longitude': None},
76,PaymentOne,$7M,{'longitude': -121.776935},-121.777


In [256]:
# And now finally we decide to drop the unnecessary column of our dataframe:
df_new.drop(["offices"], axis=1, inplace=True)
df_new

Unnamed: 0,name,total_money_raised,latitude
0,eBay,$6.7M,37.295
1,Cisco,$2.5M,37.4088
2,PayPal,$197M,37.2945
3,BlueLithium,$11.5M,37.3205
4,Baynote,$27.8M,37.3323
...,...,...,...
73,Force10 Networks,$116M,37.4186
74,SVTC Technologies,$34M,
75,D2S,$21.7M,
76,PaymentOne,$7M,37.248


In [257]:
df1_new.drop(["offices"], axis=1, inplace=True)
df1_new

Unnamed: 0,name,total_money_raised,longitude
0,eBay,$6.7M,-121.93
1,Cisco,$2.5M,-121.954
2,PayPal,$197M,-121.928
3,BlueLithium,$11.5M,-121.948
4,Baynote,$27.8M,-122.007
...,...,...,...
73,Force10 Networks,$116M,-121.945
74,SVTC Technologies,$34M,
75,D2S,$21.7M,
76,PaymentOne,$7M,-121.777


In [303]:
!ls

Geo.ipynb        Google API.ipynb


In [304]:
# We join the two dataframes and we export the saved file to our output folder! We got what we need already, so let's
# create a beautiful map!
result = df_new.join(df1_new["longitude"])
result.to_csv('../output/CompaniesCA.csv')
result.head()

Unnamed: 0,name,total_money_raised,latitude,longitude
0,eBay,$6.7M,37.295,-121.93
1,Cisco,$2.5M,37.4088,-121.954
2,PayPal,$197M,37.2945,-121.928
3,BlueLithium,$11.5M,37.3205,-121.948
4,Baynote,$27.8M,37.3323,-122.007


In [271]:
map_1 = folium.Map(location = [37.370036,-121.963527], zoom_start = 15)
icon = Icon(color = "blue",
             prefix = "fa",
             icon = "building",
             icon_color = "black",
             tooltip = "Our Company"
)
company = Marker(location=[37.370036,-121.963527], icon=icon)
company.add_to(map_1)
map_1

# First, we localize our company in the coordinates that we chose, a well communicated street in San José, very 
# handful so our clients and workers can access the building easily.

# And also, good news... There's a parking lot!! Everyone can park the car in front of the building!

In [281]:
# Now, we create a for loop to iterate through the dataframe head, to get the location of our 5 first companies that 
# have already raised a million dollars or more, and that will be our neighbours:

for i,row in result.head().iterrows():
    #popup distrito
    distrito = {
        "location" : [row["latitude"], row["longitude"]],
        "tooltip" : row["name"]
    }
    
    if row["name"] == "eBay":
        icon = Icon( color = "green",
                    prefix = "fa",
                    icon = "briefcase",
                    icon_color = "black"
        )
    elif row["name"] == "PayPal":
        icon = Icon( color = "blue",
                    prefix = "fa",
                    icon = "briefcase",
                    icon_color = "black"
        )
    elif row["name"] == "Baynote":
        icon = Icon( color = "cadetblue",
                    prefix = "fa",
                    icon = "briefcase",
                    icon_color = "black"
        )
    else:
        icon = Icon( color = "red",
                    prefix = "fa",
                    icon = "briefcase",
                    icon_color = "black"
        )
    Marker (**distrito,icon = icon).add_to(map_1)

map_1

# And finally, we output the map with our company and the rest of the companies in it!

In [150]:
# Our other database called "restaurants" may have more info about restaurants (vegans and stabucks), but that 
# database doesn't come with its location in latitude and longitude, so we may better use a Google API!