# GLOBE data sources

In [None]:
# package setup

import requests
import json
import pandas as pd
import geopandas as gpd
import numpy as np
import rasterio
import matplotlib.pyplot as plt
import sys
import rasterio
import rasterio.features
import alphashape

In [None]:
# download mosquito data from GLOBE API
base_url = 'https://api.globe.gov/search/v1/measurement/protocol/'

query_params = {
  'protocols': 'mosquito_habitat_mapper',
  'geojson': "TRUE",
  'sample': "FALSE"
}

mosquito_data_json = requests.get(base_url, params = query_params).json()

parsedData = []

for obs in mosquito_data_json['features']:
    parsedObs = obs['properties']

    latitude = obs['geometry']['coordinates'][1]
    longitude = obs['geometry']['coordinates'][0]

    parsedObs['latitude'] = latitude
    parsedObs['longitude'] = longitude

    parsedData.append(parsedObs)

In [None]:
# here's what the data looks like...
df = pd.DataFrame(parsedData)
gdf = gpd.GeoDataFrame(
    parsedData, 
    geometry = gpd.points_from_xy(df.longitude, df.latitude),
    crs = {'init' :'epsg:4326'}
).rename(columns = {
    "countryName": "country",
    "mosquitohabitatmapperMeasuredAt": "timestamp",
    "mosquitohabitatmapperWaterSource": "waterSource",
    "mosquitohabitatmapperWaterSourceType": "sourceType",
    "mosquitohabitatmapperLarvaeCount": "larvaeCount",
    "mosquitohabitatmapperMosquitoAdults": "adults",
    "mosquitohabitatmapperMosquitoEggs": "eggs",
    "mosquitohabitatmapperMosquitoPupae": "pupae",
    "mosquitohabitatmapperGenus": "genus",
    "mosquitohabitatmapperSpecies": "species",
    "mosquitohabitatmapperLarvaFullBodyPhotoUrls": "larvae_images",
    "mosquitohabitatmapperAbdomenCloseupPhotoUrls": "abdomen_images",
    "mosquitohabitatmapperWaterSourcePhotoUrls": "water_images",
}).drop(columns = [
    "mosquitohabitatmapperMeasurementLongitude", 
    "mosquitohabitatmapperMeasurementLatitude",
    "countryCode",
    "mosquitohabitatmapperBreedingGroundEliminated",
    "elevation",
    "mosquitohabitatmapperDataSource",
    "mosquitohabitatmapperMeasurementElevation",
    "mosquitohabitatmapperMosquitoHabitatMapperId",
    "mosquitohabitatmapperUserid",
    "organizationId",
    "organizationName",
    "protocol",
    "siteId",
    "siteName",
    "mosquitohabitatmapperLastIdentifyStage",
    "mosquitohabitatmapperComments",
    "geometry"
])

def coalesce(df, column_names):
    i = iter(column_names)
    column_name = next(i)
    answer = df[column_name]
    for column_name in i:
        answer = answer.fillna(df[column_name])
    return answer

def first_image(series):
    newvar = []
    for (i, photos) in enumerate(series):
        if np.isreal(photos) and np.isnan(photos):
            newvar.append("")
            continue
        newvar.append(photos.split(";")[0])
    return newvar

gdf['image_name'] = coalesce(gdf, ["abdomen_images", "larvae_images", "water_images"])
gdf['image_name'] = first_image(gdf.image_name)

gdf = gdf.drop(
    columns = {"abdomen_images", "larvae_images", "water_images"}
)

gdf

In [None]:
# convert strings to bools and create new column for if there were signs of mosquitos

dct = {'true': True, 'false': False}

gdf["adults"] = gdf["adults"].map(dct)
gdf["eggs"] = gdf["eggs"].map(dct)
gdf["pupae"] = gdf["pupae"].map(dct)
gdf['seen'] = gdf[["adults", "eggs", "pupae"]].any(axis=1)

def coalesce(df, column_names):
    i = iter(column_names)
    column_name = next(i)
    answer = df[column_name]
    for column_name in i:
        answer = answer.fillna(df[column_name])
    return answer

def first_image_only(series):
    newvar = []
    for (i, photos) in enumerate(series):
        if np.isreal(photos) and np.isnan(photos):
            newvar.append("") # no image
        else:
            newvar.append(photos.split(";")[0]) # first listed image
    return newvar

gdf['image_name'] = coalesce(gdf, ["abdomen_images", "larvae_images", "water_images"])
gdf['image_name'] = first_image_only(gdf.image_name)

gdf = gdf.drop(
    columns = {"abdomen_images", "larvae_images", "water_images"}
)

gdf = gdf[gdf.seen]


## Raster map of world

In [None]:
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
ax = world.plot(figsize = (25, 15))
gdf.plot(ax = ax, color = "red", markersize = 3)

In [None]:
raster = rasterio.features.rasterize(
  [(x.geometry, 1) for i, x in world.iterrows()],
    out_shape=(180, 360),
    transform=rasterio.Affine(1,0,-180,0,-1,90)
)

np.savetxt("processed/raster-world-map.txt", raster, delimiter = "", fmt = "%1u")

plt.imshow(raster)

## Event generation from malarial data

In [None]:
from flask import Flask
from flask_restful import Resource, Api
import json
import random
import numpy as np
import alphashape
from shapely.geometry import Point


with open('processed/survey_data.json') as json_file:
    survey_data = json.load(json_file)

with open('processed/mosquito_data.json') as json_file:
    mosquito_data = json.load(json_file)
    
with open('processed/malaria_polygon.json') as geojson_file:
    malaria_polygon = gpd.read_file("processed/malaria_polygon.json").iloc[0].geometry

raster_map = np.genfromtxt(
    "processed/raster-world-map.txt",
    dtype=np.uint8,
    delimiter=1
)

def random_location():
    lat = random.uniform(0, 159)
    long = random.uniform(0, 359)
    return np.round(np.array([lat, long]), 5)

def inland(lat, long):
    return raster_map[int(lat), int(long)] == 1

def random_land_location():
    while True:
        loc = random_location()
        lat, long = loc
        if inland(lat, long): return loc

def sample_real_event():
    return random.sample(mosquito_data, 1)[0]

In [None]:
malaria_polygon.contains(Point(0, -90))