In [2]:
# 1. Create a database in MongoDB Compass
# 2. Use connention string to connect to the database via VS Code
# Establish a connection to the MongoDB server with Python
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
import os 
from dotenv import load_dotenv 
load_dotenv()
db_username = os.getenv("db_username")
db_password = os.getenv("db_password")
db_host = os.getenv("db_host")

uri = f"mongodb+srv://{db_username}:{db_password}@{db_host}"
# Create a new client and connect to the server
client = MongoClient(uri, server_api=ServerApi('1'))
# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)


Pinged your deployment. You successfully connected to MongoDB!


In [3]:
# Get a list of databases
print (client.list_database_names())

['Testing', 'sample_airbnb', 'sample_analytics', 'sample_geospatial', 'sample_guides', 'sample_mflix', 'sample_restaurants', 'sample_supplies', 'sample_training', 'sample_weatherdata', 'admin', 'local']


In [4]:
import pprint as pp # for pretty printing json-like documents

# From a database, get a collection
shipwreck_db = client["sample_geospatial"]
shipwrecks_collection = shipwreck_db["shipwrecks"]

print(shipwreck_db.list_collection_names())
print (shipwrecks_collection.count_documents({}))  # Count all documents in the collection
pp.pprint (shipwrecks_collection.find_one())  # Print one document from the collection  


['shipwrecks']
11095
{'_id': ObjectId('578f6fa2df35c7fbdbaed8ca'),
 'chart': 'US,US,reprt,L-1083/14',
 'coordinates': [-79.940556, 9.3641392],
 'depth': 0,
 'feature_type': 'Wrecks - Visible',
 'gp_quality': '',
 'history': '',
 'latdec': 9.3641392,
 'londec': -79.940556,
 'quasou': '',
 'recrd': '',
 'sounding_type': '',
 'vesslterms': '',
 'watlev': 'always dry'}


In [None]:
# How to add document to collection?
new_shipwreck = {
    "name": "Test Shipwreck",
    "year": 2024,
    "location": {
        "type": "Point",
        "coordinates": [-122.4194, 37.7749]  # Example coordinates (San Francisco)
    },
    "description": "This is a test shipwreck entry."
}
# if name already exists, skip insertion.
if shipwrecks_collection.count_documents({"name": new_shipwreck["name"]}) == 0:
    insert_result = shipwrecks_collection.insert_one(new_shipwreck) # Insert the new document (id is auto-generated by the driver)
    print(f"Inserted document with ID: {insert_result.inserted_id}")
else: 
    print ("This shit already existed")

# Verify insertion
pp.pprint (shipwrecks_collection.find_one({"_id": insert_result.inserted_id}))

This shit already exist
{'_id': ObjectId('69292bf091d2d0896fe7bf83'),
 'description': 'This is a test shipwreck entry.',
 'location': {'coordinates': [-122.4194, 37.7749], 'type': 'Point'},
 'name': 'Test Shipwreck',
 'year': 2024}


In [None]:
"""
# Dataset Profile (like ones found in Kaggle)
from ydata_profiling import ProfileReport
profile = ProfileReport(shipwrecks_df, title="Shipwrecks Dataset Profile", explorative=True)
# profile.to_notebook_iframe()
# profile.to_file("shipwrecks_profile.html")
"""

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

100%|██████████| 14/14 [00:00<00:00, 456.20it/s]


Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
# Turn into pandas DataFrame
import pandas as pd
shipwrecks_df = pd.DataFrame(list(shipwrecks_collection.find()))
shipwrecks_df.head()