In [1]:
import folium

import geopandas as gpd
import pandas as pd

from go_utils import get_api_data, constants
from geopandas.tools import sjoin
from go_utils.cleanup import filter_invalid_coords

# Parameters

`start_date`: Start date of GLOBE Data to use

`end_date`: End date of GLOBE Data to use

`buffer_size`: The size of the square buffer used for each landcover entry in meters (50 is recommended as it corresponds to the ideal 100 meter square around each landcover entry)

`output_file_name`: Name of the file (dates will automatically be added to the end)

In [2]:
start_date = "2021-07-25"
end_date = "2021-08-25"
buffer_size = 50
output_file_name = "MHM_LC_Joined"

In [3]:
mhm_df = get_api_data(constants.mosquito_protocol, start_date = start_date, end_date= end_date)
mhm_df = filter_invalid_coords(mhm_df, "mhm_Latitude", "mhm_Longitude")
mhm_df

Unnamed: 0,mhm_protocol,mhm_measuredDate,mhm_createDate,mhm_updateDate,mhm_publishDate,mhm_organizationId,mhm_organizationName,mhm_siteId,mhm_siteName,mhm_countryName,...,mhm_IsGenusOfInterest,mhm_IsWaterSourceContainer,mhm_HasWaterSource,mhm_PhotoCount,mhm_RejectedCount,mhm_PendingCount,mhm_PhotoBitBinary,mhm_PhotoBitDecimal,mhm_SubCompletenessScore,mhm_CumulativeCompletenessScore
0,mosquito_habitat_mapper,2021-08-03,2022-02-22 12:17:18,2022-02-22 12:17:18,2022-02-22 12:45:14,2620510,Institute for Earth Observations School,36972,18TVK964287,United States,...,0,0,1,1,0,0,100,4,0.25,0.82
1,mosquito_habitat_mapper,2021-07-28,2022-02-22 12:06:08,2022-02-22 12:06:08,2022-02-22 12:45:14,17043304,United States of America Citizen Science,40974,13SCV566084,United States,...,0,1,1,7,0,0,110,6,0.50,0.90
2,mosquito_habitat_mapper,2021-07-31,2022-02-22 12:06:08,2022-02-22 12:06:08,2022-02-22 12:45:14,17043304,United States of America Citizen Science,40974,13SCV566084,United States,...,0,1,1,2,0,0,100,4,0.25,0.82
3,mosquito_habitat_mapper,2021-08-05,2022-02-22 12:17:18,2022-02-22 12:17:18,2022-02-22 12:45:14,17043304,United States of America Citizen Science,40974,13SCV566084,United States,...,0,0,1,2,0,0,100,4,0.25,0.84
4,mosquito_habitat_mapper,2021-08-10,2022-02-22 12:17:18,2022-02-22 12:17:18,2022-02-22 12:45:14,277161,Brazil Secondary School,43328,20PPS775621,Trinidad & Tobago,...,0,1,1,1,0,0,100,4,0.25,0.88
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1028,mosquito_habitat_mapper,2021-08-25,2022-02-22 12:17:18,2022-02-22 12:17:18,2022-02-22 12:45:14,17043304,United States of America Citizen Science,248072,14SMJ244199,United States,...,1,1,1,3,0,0,110,6,0.75,0.90
1029,mosquito_habitat_mapper,2021-08-22,2022-02-22 12:17:18,2022-02-22 12:17:18,2022-02-22 12:45:14,163028,Princess Chulabhorn Science High School Trang,249143,47NNJ665354,Thailand,...,0,1,1,5,0,0,110,6,0.50,0.88
1030,mosquito_habitat_mapper,2021-08-21,2022-02-22 12:17:18,2022-02-22 12:17:18,2022-02-22 12:45:14,17043304,United States of America Citizen Science,249718,13SCU468846,United States,...,0,0,1,2,0,0,100,4,0.25,0.84
1031,mosquito_habitat_mapper,2021-08-11,2022-02-22 12:17:18,2022-02-22 12:17:18,2022-02-22 12:45:14,17615655,Thailand Citizen Science,257301,47PPL034547,Thailand,...,0,1,1,1,0,0,100,4,0.25,0.86


In [4]:
lc_df = get_api_data(constants.landcover_protocol, start_date = start_date, end_date=end_date)
lc_df = filter_invalid_coords(lc_df, "lc_Latitude", "lc_Longitude")
lc_df

Unnamed: 0,lc_measuredDate,lc_createDate,lc_updateDate,lc_publishDate,lc_organizationId,lc_organizationName,lc_siteId,lc_siteName,lc_countryName,lc_countryCode,...,lc_SouthPrimary,lc_SouthSecondary,lc_WestPrimary,lc_WestSecondary,lc_PrimaryClassification,lc_SecondaryClassification,lc_PrimaryPercentage,lc_SecondaryPercentage,lc_SubCompletenessScore,lc_CumulativeCompletenessScore
0,2021-07-28,2021-07-28 14:30:04,2021-12-05 01:55:08,2022-01-16 21:05:00,17043304,United States of America Citizen Science,40974,13SCV566084,United States,USA,...,"Urban, Residential Property",,"Urban, Residential Property",,"Urban, Residential Property",,100.0,0.0,1.0,0.98
1,2021-07-31,2021-07-31 01:10:04,2021-12-05 01:55:08,2022-01-16 21:05:00,17043304,United States of America Citizen Science,40974,13SCV566084,United States,USA,...,"Urban, Residential Property",,"Urban, Residential Property",,"Urban, Residential Property",,100.0,0.0,1.0,0.98
2,2021-08-05,2021-08-05 15:20:04,2021-12-05 01:55:08,2022-01-16 21:05:00,17043304,United States of America Citizen Science,40974,13SCV566084,United States,USA,...,"Urban, Residential Property","Urban, Roads and Parking","Urban, Residential Property",,"Urban, Residential Property","Urban, Roads and Parking",62.5,37.5,1.0,0.98
3,2021-08-16,2021-08-17 12:15:07,2021-08-17 12:15:07,2022-01-16 21:05:00,13063641,GPM Satellite Mission,46273,18SUJ104472,United States,USA,...,,,,,,,0.0,0.0,0.6,0.96
4,2021-07-25,2021-07-25 18:00:04,2021-12-05 01:55:08,2022-01-16 21:05:00,17043304,United States of America Citizen Science,52378,16SEJ191374,United States,USA,...,"Herbaceous/Grassland, Short Grass","Trees, Loosely Spaced, Evergreen - Needle Leav...","Herbaceous/Grassland, Short Grass","Trees, Closely Spaced, Deciduous - Broad Leaved","Herbaceous/Grassland, Short Grass","Trees, Closely Spaced, Deciduous - Broad Leaved",55.0,27.5,1.0,0.98
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1090,2021-08-19,2021-08-27 19:00:03,2021-08-27 19:00:03,2022-01-16 21:05:00,32402168,"College of Earth, Ocean, Atmospheric Sciences ...",248146,10TFR055286,United States,USA,...,"Shrubs, Loosely Spaced, Short Deciduous, Herba...","Trees, Loosely Spaced, Deciduous - Needle Leav...","Barren, Dirt/Other, Shrubs, Closely Spaced, Sh...","Urban, Roads and Parking","Barren, Dirt/Other","Herbaceous/Grassland, Ferns or Flowers",25.0,12.5,1.0,0.98
1091,2021-08-19,2021-08-27 19:05:14,2021-08-27 19:05:14,2022-01-16 21:05:00,32402168,"College of Earth, Ocean, Atmospheric Sciences ...",248147,10TFR057284,United States,USA,...,"Barren, Dirt/Other, Trees, Closely Spaced, Eve...","Urban, Roads and Parking, Trees, Loosely Space...","Barren, Dirt/Other","Herbaceous/Grassland, Ferns or Flowers","Barren, Dirt/Other","Trees, Loosely Spaced, Evergreen - Needle Leaved",25.0,12.5,1.0,0.98
1092,2021-08-19,2021-09-01 19:30:03,2021-09-01 19:30:03,2022-01-16 21:05:00,32402168,"College of Earth, Ocean, Atmospheric Sciences ...",248448,10TFR058283,United States,USA,...,,,,,,,0.0,0.0,0.6,0.96
1093,2021-08-14,2021-09-04 11:05:04,2021-09-04 11:05:04,2022-01-16 21:05:00,82073738,DEMONSTRATION SCHOOL OF THAKSIN UNIVERSITY,248641,47NPJ025362,Thailand,THA,...,,,,,,,0.0,0.0,0.3,0.94


# Setting up GeoDataFrames

This enables us to run spatial functions to facilitate the spatial join

In [5]:
gmhm_df = gpd.GeoDataFrame(mhm_df, geometry=gpd.points_from_xy(mhm_df["mhm_Longitude"], mhm_df["mhm_Latitude"]), crs="EPSG:4326")
glc_df = gpd.GeoDataFrame(lc_df, geometry=gpd.points_from_xy(lc_df["lc_Longitude"], lc_df["lc_Latitude"]), crs="EPSG:4326").to_crs(epsg=3395)

  in_crs_string = _prepare_from_proj_string(in_crs_string)


In [6]:
glc_df.geometry = glc_df.buffer(buffer_size, cap_style=3)
glc_df = glc_df.to_crs(epsg=4326)

  in_crs_string = _prepare_from_proj_string(in_crs_string)


# Buffer Visualization

In [7]:
m = folium.Map(tiles='CartoDB positron')
for _, r in glc_df.iterrows():
    #without simplifying the representation of each borough, the map might not be displayed
    #sim_geo = gpd.GeoSeries(r['geometry'])
    sim_geo = gpd.GeoSeries(r['geometry']).simplify(tolerance=0.001)
    geo_j = sim_geo.to_json()
    geo_j = folium.GeoJson(data=geo_j,
                           style_function=lambda x: {'fillColor': 'orange'})
    geo_j.add_to(m)

m

# Join

In [8]:
joined_df = sjoin(glc_df, gmhm_df, how="inner")
joined_df

  "(%s != %s)" % (left_df.crs, right_df.crs)


Unnamed: 0,lc_measuredDate,lc_createDate,lc_updateDate,lc_publishDate,lc_organizationId,lc_organizationName,lc_siteId,lc_siteName,lc_countryName,lc_countryCode,...,mhm_IsGenusOfInterest,mhm_IsWaterSourceContainer,mhm_HasWaterSource,mhm_PhotoCount,mhm_RejectedCount,mhm_PendingCount,mhm_PhotoBitBinary,mhm_PhotoBitDecimal,mhm_SubCompletenessScore,mhm_CumulativeCompletenessScore
0,2021-07-28,2021-07-28 14:30:04,2021-12-05 01:55:08,2022-01-16 21:05:00,17043304,United States of America Citizen Science,40974,13SCV566084,United States,USA,...,0,0,1,2,0,0,100,4,0.25,0.84
1,2021-07-31,2021-07-31 01:10:04,2021-12-05 01:55:08,2022-01-16 21:05:00,17043304,United States of America Citizen Science,40974,13SCV566084,United States,USA,...,0,0,1,2,0,0,100,4,0.25,0.84
2,2021-08-05,2021-08-05 15:20:04,2021-12-05 01:55:08,2022-01-16 21:05:00,17043304,United States of America Citizen Science,40974,13SCV566084,United States,USA,...,0,0,1,2,0,0,100,4,0.25,0.84
0,2021-07-28,2021-07-28 14:30:04,2021-12-05 01:55:08,2022-01-16 21:05:00,17043304,United States of America Citizen Science,40974,13SCV566084,United States,USA,...,0,1,1,2,0,0,100,4,0.25,0.82
1,2021-07-31,2021-07-31 01:10:04,2021-12-05 01:55:08,2022-01-16 21:05:00,17043304,United States of America Citizen Science,40974,13SCV566084,United States,USA,...,0,1,1,2,0,0,100,4,0.25,0.82
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
973,2021-08-18,2021-08-18 20:25:05,2021-12-05 01:55:08,2022-01-16 21:05:00,17043304,United States of America Citizen Science,247595,18SUJ343188,United States,USA,...,0,0,1,2,0,0,100,4,0.25,0.80
1062,2021-08-22,2021-08-22 20:30:05,2021-12-05 01:55:08,2022-01-16 21:05:00,2620510,Institute for Earth Observations School,247884,18SWJ384485,United States,USA,...,0,0,1,1,0,0,100,4,0.25,0.82
1081,2021-08-24,2021-08-24 10:50:05,2021-08-24 10:50:05,2022-01-16 21:05:00,163028,Princess Chulabhorn Science High School Trang,247966,47NNJ739086,Thailand,THA,...,0,1,1,10,0,0,100,4,0.25,0.86
1082,2021-08-25,2021-08-25 08:05:03,2021-08-25 08:05:03,2022-01-16 21:05:00,163028,Princess Chulabhorn Science High School Trang,248008,47NMJ262750,Thailand,THA,...,0,1,1,4,0,0,110,6,0.50,0.88


# File Save

In [9]:
joined_df.to_csv(f"{output_file_name}_{start_date}_{end_date}.csv")