In [7]:
from pymongo import MongoClient
import logging
import pandas as pd
import requests
from bs4 import BeautifulSoup

# Set up logging
logging.basicConfig(level=logging.INFO)

# making connection URI for MongoDB
connection_uri = 'mongodb://localhost:27023/'

# giving database and collection names
database_name = 'Motor_collisions'
collection_names = ['MC_Crashes', 'MC_People']

# using chunk size for inserting data into MongoDB, as its a big dataset
chunk_size = 1000

# Connecting to MongoDB
logging.info("Connecting to MongoDB...")
with MongoClient(connection_uri) as client:
    db = client[database_name]
    logging.info("Connected!")

    for collection_name in collection_names:
        # Accessing the collection
        collection = db[collection_name]
        logging.info(f"Accessing collection '{collection_name}'...")
        
        # Reading the data from the CSV file for the collection - MC_Vehicles
        if collection_name == 'MC_Crashes':
            # CSV file path
            csv_path = "C:/Users/Naveen/Documents/Projects/DAP/Motor_Vehicle_Collisions_-_Crashes.csv"
            logging.info(f"Loading data from {csv_path}...")
            try:
                # Read CSV data in chunks for better loading
                for chunk in pd.read_csv(csv_path, chunksize=chunk_size, low_memory=False):
                    # Converting chunk to a list of dictionaries (as one dictionary per row)
                    data = chunk.to_dict(orient='records')
                    # Inserting the data into the collection
                    result = collection.insert_many(data, ordered=False, bypass_document_validation=True)
                    logging.info(f"{len(result.inserted_ids)} documents inserted into collection '{collection_name}'.")
            except Exception as e:
                logging.error(f"Error inserting documents into collection '{collection_name}': {str(e)}")
        
        # Retrieving data from the JSON endpoint for the collection - MC_People
        elif collection_name == 'MC_People':
            # JSON endpoint URL
            json_url = 'https://data.cityofnewyork.us/api/views/f55k-p6yu/columns.json'
            
            logging.info(f"Loading data from {json_url}...")
            try:
                # Sending the HTTP GET request to the JSON endpoint
                response = requests.get(json_url)
                response.raise_for_status() 
                
                # Extracting the retrieved JSON data
                json_data = response.json()
                
                # Printing the JSON data to understand its structure
                logging.info("JSON Data:")
                logging.info(json_data)
                
                # Inserting the JSON data into the collection MC_People
                result = collection.insert_one({'data': json_data})
                logging.info(f"JSON data inserted into collection '{collection_name}'.")
            except Exception as e:
                logging.error(f"Error loading data from JSON endpoint '{json_url}': {str(e)}")

logging.info("All collections accessed and data loaded.")
#final success comment

INFO:root:Connecting to MongoDB...
INFO:root:Connected!
INFO:root:Accessing collection 'MC_Crashes'...
INFO:root:Loading data from C:/Users/Naveen/Documents/Projects/DAP/Motor_Vehicle_Collisions_-_Crashes.csv...
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 docum

INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into c

INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into c

INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into c

INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into c

INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into c

INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into c

INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into c

INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into c

INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into c

INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into c

INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into c

INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into c

INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into c

INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into c

INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into collection 'MC_Crashes'.
INFO:root:1000 documents inserted into c

INFO:root:JSON data inserted into collection 'MC_People'.
INFO:root:All collections accessed and data loaded.


In [10]:
from pymongo import MongoClient

# MongoDB connection parameters
mongo_host = "localhost"
mongo_port = 27023  # Default MongoDB port
mongo_dbname = "Motor_collisions"

def check_data_uploaded():
    try:
        # Connect to MongoDB
        client = MongoClient(host=mongo_host, port=mongo_port)
        db = client[mongo_dbname]

        # Check data in MC_Vehicles collection
        vehicles_collection = db["MC_Crashes"]
        vehicles_count = vehicles_collection.count_documents({})
        print(f"Total documents in MC_Crashes collection: {vehicles_count}")

        # Check data in MC_People collection
        people_collection = db["MC_People"]
        people_count = people_collection.count_documents({})
        print(f"Total documents in MC_People collection: {people_count}")

        # Close MongoDB connection
        client.close()
    except Exception as e:
        print(f"Error connecting to MongoDB: {e}")

# Call the function to check data upload status
check_data_uploaded()


Total documents in MC_Crashes collection: 2026647
Total documents in MC_People collection: 5


In [11]:
import pymongo

# Connect to MongoDB
client = pymongo.MongoClient("mongodb://localhost:27023")
db = client["Motor_collisions"]  # Replace "your_database_name" with the name of your database
collection = db["MC_People"]  # Replace 'your_collection_name' with your actual collection name

# Retrieve a single document from the collection
document = collection.find_one()

# Check if document is not None
if document:
    # Print the keys and data types of the document
    for key in document:
        print(f"{key}: {type(document[key])}")
else:
    print("No document found in the collection.")

_id: <class 'bson.objectid.ObjectId'>
data: <class 'list'>
