In [1]:
from pymongo import MongoClient
import pandas as pd
import pprint
import pymongo
import requests
import json

# import api_key
from api_keys import geoapify_key
# set Geoapify url
GEOCODE_URL = 'https://api.geoapify.com/v1/geocode/search'

In [2]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [3]:
db = mongo['North_Carolina']

In [4]:
# review the collections in our database
print(db.list_collection_names())

['median_housing']


In [5]:
# assign the collection to a variable 
collection = db['median_housing']

In [6]:
def check_if_empty():
    # Count the number of documents in the collection
    count = collection.count_documents({})
    
    if count == 0:
        print("The collection is empty.")
    else:
        print(f"The collection contains {count} documents.")

check_if_empty()

The collection contains 204 documents.


In [7]:
def print_db_contents():
    # Fetch all documents in the collection
    documents = collection.find()
    
    # Print each document
    print("Database Contents:")
    for document in documents:
        pprint.pprint(document)  # Use pprint to format the output

print_db_contents()

Database Contents:
{'Metro': 'Greensboro-High Point, NC',
 'MunicipalCodeFIPS': 81,
 'RegionID': 2312,
 'RegionName': 'Guilford County',
 'RegionType': 'county',
 'SizeRank': 129,
 'State': 'NC',
 'StateCodeFIPS': 37,
 'StateName': 'NC',
 '_id': ObjectId('669ee6a7359de28c403f15fb'),
 'dates': {'2000-01-31': 115654.729555625,
           '2000-02-29': 115923.420953134,
           '2000-03-31': 116097.198687813,
           '2000-04-30': 116512.782299219,
           '2000-05-31': 116829.716313852,
           '2000-06-30': 117198.009120443,
           '2000-07-31': 117532.32005631,
           '2000-08-31': 117894.088759755,
           '2000-09-30': 118192.258310508,
           '2000-10-31': 118425.532505027,
           '2000-11-30': 118619.647354871,
           '2000-12-31': 118865.188150128,
           '2001-01-31': 119251.914256667,
           '2001-02-28': 119691.295559641,
           '2001-03-31': 120108.78742126,
           '2001-04-30': 120427.637432513,
           '2001-05-31': 12067

In [8]:
def get_lat_lon(region_name, state_name):
    params = {
        'text': f'{region_name}, {state_name}, USA',  
        'apiKey': geoapify_key,
        'limit': 1
    }
    print(f"Requesting lat/lon for: {region_name}, {state_name}, USA") 
    
    try:
        response = requests.get(GEOCODE_URL, params=params)
        
        # Check the status code
        if response.status_code == 200:
            print("API request successful.")  # Debug print
            data = response.json()
            print("API Response:", data)  #  API response
            
            if data.get('features'):
                # Access the first feature in the results
                feature = data['features'][0]
                if 'geometry' in feature and 'coordinates' in feature['geometry']:
                    lon, lat = feature['geometry']['coordinates']
                    return lat, lon
                else:
                    print("No coordinates found in the response.")
            else:
                print("No features found in the response.")
            return None, None
        else:
            print(f"API request failed with status code {response.status_code}")
            print("Response content:", response.text)  # debugging
            return None, None
    except requests.RequestException as e:
        print(f"An error occurred: {e}")
        return None, None

# Test the function
lat, lon = get_lat_lon('Wake County', 'NC')
print(f"Latitude: {lat}, Longitude: {lon}")

Requesting lat/lon for: Wake County, NC, USA
API request successful.
API Response: {'type': 'FeatureCollection', 'features': [{'type': 'Feature', 'properties': {'datasource': {'sourcename': 'openstreetmap', 'attribution': '© OpenStreetMap contributors', 'license': 'Open Database License', 'url': 'https://www.openstreetmap.org/copyright'}, 'country': 'United States', 'country_code': 'us', 'state': 'North Carolina', 'county': 'Wake', 'lon': -78.6118311, 'lat': 35.7979355, 'state_code': 'NC', 'result_type': 'county', 'formatted': 'Wake, NC, United States of America', 'address_line1': 'Wake, NC', 'address_line2': 'United States of America', 'category': 'administrative', 'timezone': {'name': 'America/New_York', 'offset_STD': '-05:00', 'offset_STD_seconds': -18000, 'offset_DST': '-04:00', 'offset_DST_seconds': -14400, 'abbreviation_STD': 'EST', 'abbreviation_DST': 'EDT'}, 'plus_code': '8773Q9XQ+57', 'rank': {'importance': 0.7302463904976988, 'popularity': 3.652500535111446, 'confidence': 0.9

In [9]:
def get_lat_lon(region_name, state_name):
    params = {
        'text': f'{region_name}, {state_name}, USA', 
        'apiKey': geoapify_key,
        'limit': 1
    }
    print(f"Requesting lat/lon for: {region_name}, {state_name}, USA") 
    
    try:
        response = requests.get(GEOCODE_URL, params=params)
        
        # Check the status code
        if response.status_code == 200:
            print("API request successful.")  # Debug print
            data = response.json()
            print("API Response:", data) 
            
            if data.get('features'):
                feature = data['features'][0]
                if 'geometry' in feature and 'coordinates' in feature['geometry']:
                    lon, lat = feature['geometry']['coordinates']
                    return lat, lon
                else:
                    print("No coordinates found in the response.")
            else:
                print("No features found in the response.")
            return None, None
        else:
            print(f"API request failed with status code {response.status_code}")
            print("Response content:", response.text)  # Print the response context
            return None, None
    except requests.RequestException as e:
        print(f"An error occurred: {e}")
        return None, None

def update_db():
    for document in collection.find():
        region_name = document.get('RegionName')  
        state_name = document.get('StateName')  
        if region_name and state_name:
            lat, lon = get_lat_lon(region_name, state_name)
            if lat and lon:
                collection.update_one(
                    {'_id': document['_id']},
                    {'$set': {'latitude': lat, 'longitude': lon}}
                )
                print(f'Updated {region_name} in {state_name} with lat: {lat}, lon: {lon}')
            else:
                print(f'Failed to get lat/lon for {region_name}, {state_name}')

update_db()

Requesting lat/lon for: Guilford County, NC, USA
API request successful.
API Response: {'type': 'FeatureCollection', 'features': [{'type': 'Feature', 'properties': {'datasource': {'sourcename': 'openstreetmap', 'attribution': '© OpenStreetMap contributors', 'license': 'Open Database License', 'url': 'https://www.openstreetmap.org/copyright'}, 'country': 'United States', 'country_code': 'us', 'state': 'North Carolina', 'county': 'Guilford', 'lon': -79.7888515, 'lat': 36.0875688, 'state_code': 'NC', 'result_type': 'county', 'formatted': 'Guilford, NC, United States of America', 'address_line1': 'Guilford, NC', 'address_line2': 'United States of America', 'category': 'administrative', 'timezone': {'name': 'America/New_York', 'offset_STD': '-05:00', 'offset_STD_seconds': -18000, 'offset_DST': '-04:00', 'offset_DST_seconds': -14400, 'abbreviation_STD': 'EST', 'abbreviation_DST': 'EDT'}, 'plus_code': '878236Q6+2F', 'rank': {'importance': 0.7284332046035948, 'popularity': 2.8019527358063323, 

In [11]:
def print_db_contents():
    # Fetch all documents in the collection
    documents = collection.find()
    
    # Print each document
    print("Database Contents:")
    for document in documents:
        pprint.pprint(document)

print_db_contents()

Database Contents:
{'Metro': 'Greensboro-High Point, NC',
 'MunicipalCodeFIPS': 81,
 'RegionID': 2312,
 'RegionName': 'Guilford County',
 'RegionType': 'county',
 'SizeRank': 129,
 'State': 'NC',
 'StateCodeFIPS': 37,
 'StateName': 'NC',
 '_id': ObjectId('669ee6a7359de28c403f15fb'),
 'dates': {'2000-01-31': 115654.729555625,
           '2000-02-29': 115923.420953134,
           '2000-03-31': 116097.198687813,
           '2000-04-30': 116512.782299219,
           '2000-05-31': 116829.716313852,
           '2000-06-30': 117198.009120443,
           '2000-07-31': 117532.32005631,
           '2000-08-31': 117894.088759755,
           '2000-09-30': 118192.258310508,
           '2000-10-31': 118425.532505027,
           '2000-11-30': 118619.647354871,
           '2000-12-31': 118865.188150128,
           '2001-01-31': 119251.914256667,
           '2001-02-28': 119691.295559641,
           '2001-03-31': 120108.78742126,
           '2001-04-30': 120427.637432513,
           '2001-05-31': 12067

In [59]:
def export_db_to_json():
    documents = list(collection.find())
    for doc in documents:
        doc['_id'] = str(doc['_id'])  # Convert ObjectId to string for JSON serialization

    with open('Resources/housing.json', 'w') as file:
        json.dump(documents, file, indent=4)
        
    print("Database export successful.")

export_db_to_json()

Database export successful.
