# Querying through Mongo DB

In [1]:
from pymongo import MongoClient
import pandas as pd
import seaborn as sns

### Calling the service MongoDB

In [2]:
client = MongoClient("localhost:27017")

In [3]:
db = client["Ironhack"]

In [4]:
comp = db.get_collection("companies")

### Regex for names, categories, tags, description

- Conditions

In [5]:
companies_name = {'name':
                        {"$regex": 
                        '^(design|ux|ui|design|frontend|backend|gaming|game|gamer|web|develop|software)',
                        "$options" :'i'}}

In [6]:
companies_cat = {'category_code':
                        {"$regex": 
                        '^(design|ux|ui|design|frontend|backend|gaming|game|gamer|web|develop|software)',
                        "$options" :'i'}}

In [7]:
companies_tag = {'tag_list':
                        {"$regex": 
                        '^(design|ux|ui|design|frontend|backend|gaming|game|gamer|web|develop|software)',
                        "$options" :'i'}}

In [8]:
companies_desc = {'description':
                         {"$regex": 
                        '^(design|ux|ui|design|frontend|backend|gaming|game|gamer|web|develop|software)',
                        "$options" :'i'}}

In [9]:
companies_overview ={'overview':
                        {"$regex": 
                        '^(design|ux|ui|design|frontend|backend|gaming|game|gamer|web|develop|software)',
                        "$options" :'i'}}

-  Filter

In [10]:
filter_ = {
            "$or": [companies_name, companies_cat, companies_tag, companies_desc, companies_overview]
            }

- get the names of the companies filtered

In [11]:
projection = {'name': 1,
              'offices.city':1,
              'offices.country_code':1,
              'offices.latitude':1, 
              'offices.longitude':1,
              'acquisition.price_amount':1,              
              '_id': 0}

In [12]:
filt_list = list(comp.find(filter_, projection))
filt_list

[{'name': 'Wetpaint',
  'acquisition': {'price_amount': 30000000},
  'offices': [{'city': 'Seattle',
    'country_code': 'USA',
    'latitude': 47.603122,
    'longitude': -122.333253},
   {'city': 'New York',
    'country_code': 'USA',
    'latitude': 40.7237306,
    'longitude': -73.9964312}]},
 {'name': 'Zoho',
  'offices': [{'city': 'Pleasanton',
    'country_code': 'USA',
    'latitude': 37.692934,
    'longitude': -121.904945}]},
 {'name': 'Postini',
  'acquisition': {'price_amount': 625000000},
  'offices': [{'city': 'San Carlos',
    'country_code': 'USA',
    'latitude': 37.506885,
    'longitude': -122.247573}]},
 {'name': 'Geni',
  'acquisition': {'price_amount': None},
  'offices': [{'city': 'West Hollywood',
    'country_code': 'USA',
    'latitude': 34.090368,
    'longitude': -118.393064}]},
 {'name': 'Flektor',
  'acquisition': {'price_amount': 20000000},
  'offices': [{'city': 'Culver City',
    'country_code': 'USA',
    'latitude': 34.025958,
    'longitude': -118.37

In [13]:
city=[i['offices'] for i in filt_list]
city

[[{'city': 'Seattle',
   'country_code': 'USA',
   'latitude': 47.603122,
   'longitude': -122.333253},
  {'city': 'New York',
   'country_code': 'USA',
   'latitude': 40.7237306,
   'longitude': -73.9964312}],
 [{'city': 'Pleasanton',
   'country_code': 'USA',
   'latitude': 37.692934,
   'longitude': -121.904945}],
 [{'city': 'San Carlos',
   'country_code': 'USA',
   'latitude': 37.506885,
   'longitude': -122.247573}],
 [{'city': 'West Hollywood',
   'country_code': 'USA',
   'latitude': 34.090368,
   'longitude': -118.393064}],
 [{'city': 'Culver City',
   'country_code': 'USA',
   'latitude': 34.025958,
   'longitude': -118.379768}],
 [{'city': 'Beverly Hills',
   'country_code': 'USA',
   'latitude': 34.076179,
   'longitude': -118.39417}],
 [{'city': 'San Francisco',
   'country_code': 'USA',
   'latitude': 37.775196,
   'longitude': -122.419204},
  {'city': 'New York City',
   'country_code': 'USA',
   'latitude': None,
   'longitude': None}],
 [{'city': 'Menlo Park',
   'coun

In [14]:
city[0][0]

{'city': 'Seattle',
 'country_code': 'USA',
 'latitude': 47.603122,
 'longitude': -122.333253}

In [15]:
list_cities=[j for i in city for j in i]
list_cities

[{'city': 'Seattle',
  'country_code': 'USA',
  'latitude': 47.603122,
  'longitude': -122.333253},
 {'city': 'New York',
  'country_code': 'USA',
  'latitude': 40.7237306,
  'longitude': -73.9964312},
 {'city': 'Pleasanton',
  'country_code': 'USA',
  'latitude': 37.692934,
  'longitude': -121.904945},
 {'city': 'San Carlos',
  'country_code': 'USA',
  'latitude': 37.506885,
  'longitude': -122.247573},
 {'city': 'West Hollywood',
  'country_code': 'USA',
  'latitude': 34.090368,
  'longitude': -118.393064},
 {'city': 'Culver City',
  'country_code': 'USA',
  'latitude': 34.025958,
  'longitude': -118.379768},
 {'city': 'Beverly Hills',
  'country_code': 'USA',
  'latitude': 34.076179,
  'longitude': -118.39417},
 {'city': 'San Francisco',
  'country_code': 'USA',
  'latitude': 37.775196,
  'longitude': -122.419204},
 {'city': 'New York City',
  'country_code': 'USA',
  'latitude': None,
  'longitude': None},
 {'city': 'Menlo Park',
  'country_code': 'USA',
  'latitude': 37.48413,
  '

In [16]:
df= pd.DataFrame(list_cities)

In [17]:
df.city.value_counts()

San Francisco    419
New York         378
                 322
London           258
Los Angeles      118
                ... 
Harrisonburg       1
Loveland           1
Blacksburg         1
Nairobi            1
Livingston         1
Name: city, Length: 1888, dtype: int64

In [18]:
df

Unnamed: 0,city,country_code,latitude,longitude
0,Seattle,USA,47.603122,-122.333253
1,New York,USA,40.723731,-73.996431
2,Pleasanton,USA,37.692934,-121.904945
3,San Carlos,USA,37.506885,-122.247573
4,West Hollywood,USA,34.090368,-118.393064
...,...,...,...,...
7631,Bohemia,USA,40.775055,-73.088140
7632,Palm Beach,USA,26.705331,-80.041395
7633,KÃ¼ssnacht,CHE,47.088219,8.437163
7634,Livingston,USA,40.793024,-74.323554


In [19]:
df['count'] = df.groupby('city')['city'].transform('count')
df

Unnamed: 0,city,country_code,latitude,longitude,count
0,Seattle,USA,47.603122,-122.333253,111.0
1,New York,USA,40.723731,-73.996431,378.0
2,Pleasanton,USA,37.692934,-121.904945,12.0
3,San Carlos,USA,37.506885,-122.247573,7.0
4,West Hollywood,USA,34.090368,-118.393064,9.0
...,...,...,...,...,...
7631,Bohemia,USA,40.775055,-73.088140,1.0
7632,Palm Beach,USA,26.705331,-80.041395,2.0
7633,KÃ¼ssnacht,CHE,47.088219,8.437163,1.0
7634,Livingston,USA,40.793024,-74.323554,1.0


In [21]:
df.sort_values(by=['count'], ascending = False, inplace= True)
df

Unnamed: 0,city,country_code,latitude,longitude,count
1638,San Francisco,USA,37.788457,-122.399884,419.0
3907,San Francisco,USA,37.783171,-122.392901,419.0
164,San Francisco,USA,37.780883,-122.395257,419.0
165,San Francisco,USA,,,419.0
4368,San Francisco,USA,37.844803,-122.290897,419.0
...,...,...,...,...,...
754,,USA,37.090240,-95.712891,
755,,USA,37.090240,-95.712891,
763,,NLD,52.132633,5.291266,
769,,USA,37.090240,-95.712891,


In [22]:
df.drop_duplicates(subset = ['city'], inplace = True)
df

Unnamed: 0,city,country_code,latitude,longitude,count
1638,San Francisco,USA,37.788457,-122.399884,419.0
3793,New York,USA,,,378.0
1884,,ISR,31.046051,34.851612,322.0
7451,London,GBR,51.517356,-0.103774,258.0
747,Los Angeles,USA,,,118.0
...,...,...,...,...,...
4347,Fort Lee,USA,,,1.0
7144,Crystal Lake,USA,42.279634,-88.333977,1.0
7143,SetÃºbal,PRT,,,1.0
4117,Carlow,IRL,52.821340,-6.922036,1.0


In [23]:
df.rename(columns={"country_code": "country"},inplace = True)

In [25]:
df.reset_index(inplace= True)

In [32]:
df.drop('index',axis =1, inplace =True)

In [33]:
df.dropna(subset=['city'], inplace = True)

In [37]:
df.drop(2, inplace=True)

In [38]:
df

Unnamed: 0,city,country,latitude,longitude,count
0,San Francisco,USA,37.788457,-122.399884,419.0
1,New York,USA,,,378.0
3,London,GBR,51.517356,-0.103774,258.0
4,Los Angeles,USA,,,118.0
5,Seattle,USA,47.620716,-122.347533,111.0
...,...,...,...,...,...
1883,Hialeah,USA,25.866642,-80.314220,1.0
1884,Fort Lee,USA,,,1.0
1885,Crystal Lake,USA,42.279634,-88.333977,1.0
1886,SetÃºbal,PRT,,,1.0


Query about money