Importing the data into MongoDB

In [1]:
# Import the dependencies for MongoDB
from pymongo import MongoClient
from pprint import pprint

In [2]:
# Create an instance of the MongoClient
mongo = MongoClient(port=27017)

In [3]:
# Confirm the connection by listing the database names
print(mongo.list_database_names())

['admin', 'config', 'epa', 'flight_data', 'local', 'met', 'petsitly_marketing', 'uk_food']


Use the terminal to import the flight data using this command:

mongoimport --db flight_data_db --collection flight_data --type csv --headerline --drop --file ../Data/CSV/cleaned_flight_data.csv

In [4]:
# Assign the database to a variable
db = mongo['flight_data']

In [5]:
# Review the collections in the database
print(db.list_collection_names())

['flights']


In [7]:
# Review the first document in the collection
pprint(db['flights'].find_one())

{'Year': 2010,
 '_id': ObjectId('679845ccb6f7d3e6bf2ccf78'),
 'arrival_airport': 'TPA',
 'arrival_airport_id': 15304,
 'arrival_city': 'Tampa, FL (Metropolitan Area)',
 'arrival_city_id': 33195,
 'arrival_latitude': 37.8606,
 'arrival_longitude': -78.804199,
 'carrier_lg': 'Delta Air Lines Inc.',
 'carrier_low': 'US Airways Group Inc.',
 'departure_airport': 'SLC',
 'departure_airport_id': 14869,
 'departure_city': 'Salt Lake City, UT',
 'departure_city_id': 34614,
 'departure_latitude': 40.758478,
 'departure_longitude': -111.888142,
 'fare': 226.59,
 'fare_lg': 247.69,
 'fare_low': 166.99,
 'large_ms': 0.38,
 'lf_ms': 0.2,
 'nsmiles': 1887,
 'passengers': 200,
 'quarter': 1,
 'tbl': 'Table 1a',
 'tbl1apk': '201011486915304SLCTPA'}


In [11]:
# Save the data to json file
import json
from bson import ObjectId

def convert_objectid(obj):
    if isinstance(obj, ObjectId):
        return str(obj)
    raise TypeError("Object of type %s is not JSON serializable" % type(obj).__name__)

with open('../Data/JSON/flights.json', 'w') as f:
    json.dump(list(db['flights'].find()), f, default=convert_objectid)

In [12]:
# Close the connection
mongo.close()