# Close to Tech Startups that rised a million or more & Design Companies

In [1]:
import json, requests
from bson import ObjectId

In [2]:
from pymongo import MongoClient
client = MongoClient()
db = client.get_database('companies_project')

Let's check, from the original collection, how many different categories the companies have, and select those that match with our needs (in this case Tech and Design companies).

In [3]:
usa_offices_all = list(db.companies_usa.find({}, {'category_code':1}))
categories = set([e['category_code'] for e in usa_offices_all])
print(categories)

{'sports', 'search', 'consulting', 'transportation', 'games_video', 'real_estate', 'messaging', 'biotech', 'hospitality', 'software', 'advertising', 'travel', 'enterprise', 'news', 'design', 'social', 'cleantech', 'nonprofit', 'mobile', 'education', 'public_relations', 'automotive', 'photo_video', 'web', 'ecommerce', 'finance', 'music', 'fashion', 'hardware', 'legal', 'network_hosting', 'analytics', None, 'manufacturing', 'nanotech', 'other', 'health', 'medical', 'security', 'semiconductor'}


In [4]:
# Selection of categories
tech_design = ['software', 'nanotech', 'games_video', 'cleantech', 'network_hosting', 'web', 'mobile', 'biotech']

Let's check now for the range of years where the companies were founded.

In [5]:
usa_offices_all = list(db.companies_usa.find({'founded_year':{'$ne':None}}, {'founded_year':1}))
years = list(set([e['founded_year'] for e in usa_offices_all]))
years.sort()
print(years)

[1836, 1840, 1843, 1846, 1857, 1863, 1879, 1887, 1888, 1889, 1890, 1894, 1896, 1898, 1900, 1901, 1902, 1906, 1912, 1920, 1922, 1923, 1928, 1930, 1936, 1938, 1939, 1943, 1945, 1946, 1947, 1952, 1954, 1955, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1966, 1967, 1968, 1969, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013]


As the list shows, the yougest companies were established by 2013, so I should consider startups those which founded year is not lower than 2008.

In [6]:
tech_comp = list(db.companies_usa.find({'category_code':{'$in':tech_design}, 'founded_year':{'$gte':2008}, 'total_money_raised':{'$regex':'[M].*'}}))

In [7]:
design_comp = list(db.companies_usa.find({'category_code':'design'}))

In [8]:
tech_design_comp = tech_comp + design_comp

So, there are 124 companies that meet the requirements (122 tech startups that raised a million or more, and 2 design companies). Let's check how many of those are within a 15-kilometer radius of our 89 candidates. For that mean I will first need to save tech and design companies as a new collection of the database, and then create a geoJSON standar for its coordinates; as well as for the 89 candidates ones.

In [35]:
db.companies_usa_tech_desg.insert_many(tech_design_comp)

<pymongo.results.InsertManyResult at 0x7fabb5b987c0>

In [9]:
# Tech and design companies
tech_des = list(db.companies_usa_tech_desg.find({}, {'offices':1}))

In [43]:
for comp in tech_des:
    id = {"_id":comp["_id"]}
    coord = {
              "type": "Point",
              "coordinates": [comp["offices"]["longitude"], comp["offices"]["latitude"]]
            }
    update = {"$set":{"coord":coord}}
    db.companies_usa_tech_desg.update_one(id,update)

In [10]:
# Candidates
candidates = list(db.companies_usa_filt3_c.find({}, {'offices':1}))

In [12]:
for comp in candidates:
    id = {"_id":comp["_id"]}
    coord = {
              "type": "Point",
              "coordinates": [comp["offices"]["longitude"], comp["offices"]["latitude"]]
            }
    update = {"$set":{"coord":coord}}
    db.companies_usa_filt3_c.update_one(id,update)

Now let's check how many close companies each of the candidates have around

In [13]:
candidates_n = list(db.companies_usa_filt3_c.find({}, {'coord':1}))
tech_des_n = list(db.companies_usa_tech_desg.find({}, {'coord':1}))

In [14]:
for cand in candidates_n:
    point = cand['coord']
    id = ObjectId(cand['_id'])
    query = {"coord": {"$near": {"$geometry": point, "$maxDistance":15_000}}}
    res = list(db.companies_usa_tech_desg.find(query, {'coord':1}))
    many = len(res)

    data_format = {}
    index = 0
    for off in res:
        data_format[str(index)] = off
        index += 1
    data_format['many'] = many
   
    db.companies_usa_filt3_c.update_one({'_id':id}, {'$set':{'offices.off_nearby': data_format}})

In [15]:
cand_with_comp = list(db.companies_usa_filt3_c.find({}, {'offices.off_nearby.many':1}))

In [16]:
set([comp['offices']['off_nearby']['many'] for comp in cand_with_comp])

{0, 1, 2, 3, 6, 7, 9, 10, 11, 16, 22}

As the list above shows, there is an office or offices with 22 tech startups/design companies around; some others with more than 10, and some others with less. Let's select those with more than 10 companies around.

In [17]:
offices_4 = list(db.companies_usa_filt3_c.find({'offices.off_nearby.many':{'$gte':10}}))
len(offices_4)

32

There are 32 offices with 10 or more tech startups/design companies around. Let's save them into the database as a new collection and move onto the last filter.

In [18]:
db.companies_usa_filt4_c.insert_many(offices_4)

<pymongo.results.InsertManyResult at 0x7f81fa554a80>