In [1]:
# Importing libraries
import pandas as pd
import pymongo
import getpass
import json
from geojson import Feature, FeatureCollection, Point

In [2]:
# Making client for pymongo (connection to server)
user = input("Username [%s]: " % getpass.getuser())
password = getpass.getpass(prompt='Password')
client = pymongo.MongoClient(f"mongodb+srv://{user}:{password}@cluster0.xinvi.mongodb.net/test")


Username [pasca]: Pascal
Password········


In [62]:
# Defining new database
airbnb_db = client['airbnb_clean']

# Defining new collection for airbnb_db. 
airbnb_collection = airbnb_db['airbnb_clean']

# Make dataframe from Collection
df = pd.DataFrame(list(airbnb_collection.find()))
df.head()

Unnamed: 0,_id,id,host_response_rate,host_acceptance_rate,host_picture_url,latitude,longtitude,property_type,room_type,accomodates,...,pool,hair_dryer,gym,wheelchair_accessible,iron,shampoo,hangers,first_aid_kit,elevator,comments
0,60f1a2f1bc9dc8e90a6f1e08,7694559,100,91,https://a0.muscache.com/im/users/22348222/prof...,42.3451341,-71.07513462,Apartment,Entire home/apt,2,...,0,0,0,0,0,0,0,0,0,
1,60f1a2f1bc9dc8e90a6f1df9,13215952,80,90,https://a0.muscache.com/im/users/19798741/prof...,42.30992056,-71.11072692,Apartment,Private room,2,...,0,0,0,0,0,1,1,0,0,
2,60f1a2f1bc9dc8e90a6f1e03,13711334,100,91,https://a2.muscache.com/im/users/32532791/prof...,42.35150365,-71.08710415,Apartment,Entire home/apt,5,...,0,1,0,0,1,0,1,0,0,Spacious well equipped apartment in good area ...
3,60f1a2f1bc9dc8e90a6f1e0d,9723217,100,33,https://a2.muscache.com/im/pictures/e49e4396-0...,42.35540779,-71.13875387,House,Private room,1,...,0,1,0,0,1,1,1,0,0,"Everything was great, maybe watch out for the ..."
4,60f1a2f1bc9dc8e90a6f1e13,14672735,0,0,https://a2.muscache.com/im/pictures/20b165e7-a...,42.31978117,-71.10467022,Apartment,Private room,1,...,0,0,0,0,0,0,1,0,0,


In [63]:
# drop columns not needed for mapping
df2 = df[["id", "latitude", "longtitude", "property_type", "room_type",
          "accomodates", "bathrooms", "bedrooms", "price", "security_deposit", "cleaning_fee", "review_scores_rating"]]
df2.isnull().sum()

id                      0
latitude                0
longtitude              0
property_type           3
room_type               0
accomodates             0
bathrooms               0
bedrooms                0
price                   0
security_deposit        0
cleaning_fee            0
review_scores_rating    0
dtype: int64

In [64]:
# Drop NaN 
df3 = df2.dropna()
df3.isnull().sum()

id                      0
latitude                0
longtitude              0
property_type           0
room_type               0
accomodates             0
bathrooms               0
bedrooms                0
price                   0
security_deposit        0
cleaning_fee            0
review_scores_rating    0
dtype: int64

In [65]:
df3.dtypes

id                      object
latitude                object
longtitude              object
property_type           object
room_type               object
accomodates             object
bathrooms               object
bedrooms                object
price                   object
security_deposit        object
cleaning_fee            object
review_scores_rating    object
dtype: object

In [66]:
# group by best score rating
sort_df = df3.sort_values(by=['review_scores_rating'], ascending=False)
top200_df = sort_df.iloc[:200]
top200_df.head(5)

Unnamed: 0,id,latitude,longtitude,property_type,room_type,accomodates,bathrooms,bedrooms,price,security_deposit,cleaning_fee,review_scores_rating
2093,4530670,42.27174515,-71.16148454,House,Private room,2,1.0,1,80,0,0,99
1216,2167993,42.37638503,-71.05136255,Apartment,Entire home/apt,2,1.0,1,149,0,100,99
2209,7809904,42.3700513,-71.04169711,Apartment,Entire home/apt,3,1.5,1,130,0,0,99
2591,7611271,42.33181559,-71.03398667,House,Entire home/apt,7,2.5,4,391,500,200,99
3013,7804358,42.35132218,-71.08259079,Apartment,Entire home/apt,2,1.0,1,225,400,65,99


In [67]:
# export the data to json file
top200_df.to_json('static/data/listings.json', orient='table')

In [68]:
# make a geojson file
geojson = {"type": "FeatureCollection", "features": []}

for _, row in top200_df.iterrows():
    feature = {"type": "Feature", "geometry": {"type": "Point", "coordinates": [row['longtitude'], row['latitude']]}, 
               "properties": {"id": row['id'], "property_type": row['property_type'],
              "room_type": row['room_type'], "accomodates": row['accomodates'], 
              "bathrooms": row['bathrooms'], "bedrooms": row['bedrooms'], "price": row['price']}}
    geojson['features'].append(feature)


with open('static/data/listings.geojson', 'w') as fp:
    json.dump(geojson, fp)    

In [58]:
geojson

{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'geometry': {'type': 'Point',
    'coordinates': ['-71.16148454', '42.27174515']},
   'properties': {'id': '4530670',
    'property type': 'House',
    'room type': 'Private room',
    'accomodates': '2',
    'bathrooms': '1',
    'bedrooms': '1',
    'price': '80'}},
  {'type': 'Feature',
   'geometry': {'type': 'Point',
    'coordinates': ['-71.05136255', '42.37638503']},
   'properties': {'id': '2167993',
    'property type': 'Apartment',
    'room type': 'Entire home/apt',
    'accomodates': '2',
    'bathrooms': '1',
    'bedrooms': '1',
    'price': '149'}},
  {'type': 'Feature',
   'geometry': {'type': 'Point',
    'coordinates': ['-71.04169711', '42.3700513']},
   'properties': {'id': '7809904',
    'property type': 'Apartment',
    'room type': 'Entire home/apt',
    'accomodates': '3',
    'bathrooms': '1.5',
    'bedrooms': '1',
    'price': '130'}},
  {'type': 'Feature',
   'geometry': {'type': 'Point',
    