In [1]:
# Import Libraries
import os
import sys
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import geopandas as gpd
from pymongo import MongoClient
from dotenv import load_dotenv
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import requests

In [2]:
# Add the 'scripts' directory to the Python path
sys.path.append(os.path.abspath(os.path.join('..', 'scripts')))

In [3]:
import extract_to_mongodb as etm
import db_utils as dbu

In [None]:
# print(dir(dbu))

In [4]:
collection_name = os.getenv('COLLECTION_NAME_FEATUREENGINEERED')
naturalearth_lowres = os.getenv('NATURALEARTH_SHAPEFILE_PATH')

In [5]:
print(f"Collection Name: {collection_name}")

Collection Name: wildfire_feature_engineered_data


Load the Data

In [6]:
# Load the cleaned data
geo_wfp = dbu.load_data_from_mongodb(collection_name)

INFO:pymongo.client:You appear to be connected to a CosmosDB cluster. For more information regarding feature compatibility and support please visit https://www.mongodb.com/supportability/cosmosdb


An error occurred: 'DataFrame' object has no attribute 'lon'


In [7]:
geo_wfp.head(5)

AttributeError: 'NoneType' object has no attribute 'head'

In [None]:
app_name = os.getenv('APP_NAME')
contact_email = os.getenv('CONTACT_EMAIL')

In [None]:
def reverse_geocode_nominatim(lat, lon):
    url = 'https://nominatim.openstreetmap.org/reverse'
    params = {
        'format': 'json',
        'lat': lat,
        'lon': lon,
        'zoom': 10,
        'addressdetails': 1
    }
    headers = {
        'User-Agent': f'{app_name} ({contact_email})'
    }
    response = requests.get(url, params=params, headers=headers)
    if response.status_code == 200:
        data = response.json()
        address = data.get('address', {})
        city = address.get('city', None)
        if not city:
            city = address.get('town', None)
        if not city:
            city = address.get('village', None)
        if not city:
            city = address.get('county', None)
        return city
    else:
        print(f"Error: {response.status_code}")
        return None

In [None]:
# Calculate latitude and longitude from sine and cosine values
def calculate_lat_lon(row):
    lat = np.arcsin(row.lat_sin) * 180 / np.pi
    lon = np.arctan2(row.lon_sin, row.lon_cos) * 180 / np.pi
    return lat, lon




In [None]:
# Apply reverse geocoding to the first 5 records using computed latitude and longitude
geo_wfp_first_5 = geo_wfp.iloc[:5].copy()
geo_wfp_first_5['lat'], geo_wfp_first_5['lon'] = zip(*geo_wfp_first_5.apply(calculate_lat_lon, axis=1))
geo_wfp_first_5['city'] = geo_wfp_first_5.apply(lambda row: reverse_geocode_nominatim(row.lat, row.lon), axis=1)

# Print the first 5 records with the city column
print(geo_wfp_first_5[['lat', 'lon', 'city']])

In [None]:
# # Apply reverse geocoding to the first 5 records using longitude and latitude
# geo_wfp_first_5 = geo_wfp.iloc[:5].copy()
# geo_wfp_first_5['city'] = geo_wfp_first_5.apply(lambda row: reverse_geocode_nominatim(row.lat, row.lon), axis=1)

# # Merge the city data back into the original DataFrame
# geo_wfp = geo_wfp.merge(geo_wfp_first_5[['city']], left_index=True, right_index=True, how='left')

# Apply reverse geocoding to all records using longitude and latitude
# Apply reverse geocoding to all records using computed latitude and longitude
geo_wfp['lat'], geo_wfp['lon'] = zip(*geo_wfp.apply(calculate_lat_lon, axis=1))
geo_wfp['city'] = geo_wfp.apply(lambda row: reverse_geocode_nominatim(row.lat, row.lon), axis=1)


In [None]:
#analysis only, this part can be removed
#geo_wfp.to_csv('engineered_wildfire_data.csv', index=False)

In [None]:
# Save the csv featured engineering data to mongodb
#dbu.insert_data_to_mongodb('engineered_wildfire_data.csv', os.getenv('COLLECTION_NAME_FEATUREENGINEERED'))


In [None]:
# Save the dataframe -  featured engineering data to mongodb
dbu.insert_df_only_to_mongodb(geo_wfp, os.getenv('COLLECTION_NAME_FEATUREENGINEERED_WITH_CITY'))