In [1]:
import os
import pandas as pd
from go_utils.photo_download import get_mhm_download_targets

In [136]:
input_file = "Ethiopia.csv"
resolution = (480, 360)
photo_directory = "Africa Ethiopia Photos"

In [137]:
photo_types = {
    "watersource_photo" : True,
    "larvae_photo" : True,
    "abdomen_photo" : False
}

In [138]:
df = pd.read_csv(input_file)

In [139]:
download_kwargs = {key : "" for key, v in photo_types.items() if not v}
targets = get_mhm_download_targets(df, photo_directory, include_in_name = ["mhm_id"], **download_kwargs)

In [140]:
from PIL import Image, ImageDraw, ImageFont
import requests
def get_img(url):
    """Downloads an image from a url
        
        Arguments:
            url: str, an image URL for a GLOBE Landcover Image
            
        Returns:
            None
        """
        
    try:
        return Image.open(requests.get(url, stream=True).raw)
    except Exception as e:
        print(f"{url} failed, retrying...")
        try:
            img = Image.open(requests.get(url, stream=True).raw)
            print("retry successful")
            return img
        except Exception as e:
            print(f"{url} failed: {repr(e)}")
        return None
    
def download_photo(url, directory, filename):
    img = get_img(url)
    img.resize(resolution).save(f"{directory}/{filename}.jpg")

In [141]:
if not os.path.exists(photo_directory):
    os.mkdir(photo_directory)
for target in targets:
    url, directory, filename = target
    download_photo(url, directory, filename)

# Spreadsheet Generator

In [115]:
import xlsxwriter
from collections import defaultdict
from go_utils import constants, get_api_data

In [142]:
spreadsheet_name = "ethiopia_larvae_2017-05-31_2022-04-22.xlsx"

In [143]:
photos = os.listdir(photo_directory)

In [144]:
picture_dict = defaultdict(list)
for photo in photos:
    mhm_entry = photo.split("_")[1]
    
    picture_dict[mhm_entry].append(photo)

In [145]:
picture_dict

defaultdict(list,
            {'32516': ['mhm_32516_2390182.png.jpg',
              'mhm_32516_2390186.png.jpg']})

In [146]:
mhm_df = get_api_data(constants.mosquito_protocol)

mhm_df

Unnamed: 0,mhm_protocol,mhm_measuredDate,mhm_createDate,mhm_updateDate,mhm_publishDate,mhm_organizationId,mhm_organizationName,mhm_siteId,mhm_siteName,mhm_countryName,...,mhm_IsGenusOfInterest,mhm_IsWaterSourceContainer,mhm_HasWaterSource,mhm_PhotoCount,mhm_RejectedCount,mhm_PendingCount,mhm_PhotoBitBinary,mhm_PhotoBitDecimal,mhm_SubCompletenessScore,mhm_CumulativeCompletenessScore
0,mosquito_habitat_mapper,2018-11-25,2022-02-22 09:16:51,2022-02-22 09:16:51,2022-02-22 12:45:14,13063641,GPM Satellite Mission,35785,18SUJ105472,United States,...,0,1,1,2,0,0,100,4,0.25,0.78
1,mosquito_habitat_mapper,2019-04-07,2022-02-22 09:38:35,2022-02-22 09:38:35,2022-02-22 12:45:14,13063641,GPM Satellite Mission,35785,18SUJ105472,United States,...,0,1,1,3,0,0,100,4,0.25,0.80
2,mosquito_habitat_mapper,2019-04-07,2022-02-22 09:38:35,2022-02-22 09:38:35,2022-02-22 12:45:14,13063641,GPM Satellite Mission,35785,18SUJ105472,United States,...,0,1,1,4,0,0,100,4,0.25,0.80
3,mosquito_habitat_mapper,2019-05-29,2022-02-22 09:46:10,2022-02-22 09:46:10,2022-02-22 12:45:14,13063641,GPM Satellite Mission,35785,18SUJ105472,United States,...,0,1,1,7,0,0,110,6,0.50,0.86
4,mosquito_habitat_mapper,2019-08-04,2022-02-22 10:08:15,2022-02-22 10:08:15,2022-02-22 12:45:14,13063641,GPM Satellite Mission,35785,18SUJ105472,United States,...,0,1,1,3,0,0,100,4,0.25,0.82
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34689,mosquito_habitat_mapper,2022-04-22,2022-04-23 07:15:01,2022-04-25 14:37:28,2022-04-27 21:25:46,17615655,Thailand Citizen Science,274394,47PPK056343,Thailand,...,0,1,1,1,0,0,100,4,0.25,0.82
34690,mosquito_habitat_mapper,2022-04-22,2022-04-23 07:25:01,2022-04-25 14:36:51,2022-04-27 21:25:46,17615655,Thailand Citizen Science,274395,47PPK056342,Thailand,...,0,1,1,1,0,0,100,4,0.25,0.88
34691,mosquito_habitat_mapper,2022-04-22,2022-04-23 08:20:04,2022-04-25 14:37:27,2022-04-27 21:25:46,17615655,Thailand Citizen Science,274401,47PNK660343,Thailand,...,0,0,1,1,0,0,100,4,0.25,0.82
34692,mosquito_habitat_mapper,2022-04-07,2022-04-25 08:40:01,2022-04-25 14:25:34,2022-04-27 21:25:46,17648539,Mauritius Citizen Science,274577,40KEC807556,Mauritius,...,1,1,1,3,0,0,100,4,0.50,0.86


In [147]:
# Create an new Excel file and add a worksheet.
workbook = xlsxwriter.Workbook(spreadsheet_name)

In [148]:
worksheet = workbook.add_worksheet("Photos")



# Insert an image.
worksheet.write('A1', 'MHM Id')
worksheet.write('B1', "Classification")
worksheet.write('C1', "Container")
worksheet.write('D1', "Userid")
worksheet.write('E1', "latitude")
worksheet.write('F1', "longitude")
worksheet.write('G1', "date")
worksheet.write('H1', "Photo")

file_constant = max([len(files) for files in picture_dict.values()])
worksheet.set_column(7, 7 + file_constant + 2, 65)


for a, entry in enumerate(picture_dict.items()):
    i = a + 1
    mhm_id, filenames = entry
    worksheet.write(i, 0, mhm_id)
    entry = mhm_df[mhm_df["mhm_MosquitoHabitatMapperId"] == int(mhm_id)].to_dict('records')[0]
    #print(entry)
    userid = entry["mhm_Userid"]
    classification = entry["mhm_Genus"]
    classification = classification if not pd.isna(classification) else ""
    container = entry["mhm_WaterSource"]
    latitude = entry["mhm_Latitude"]
    longitude = entry["mhm_Longitude"]
    date = entry["mhm_measuredDate"]
    date_str = date.strftime("%Y/%m/%d")
    #rint(date)
    #year, month, day = date.split("-")
    worksheet.write(i, 1, classification)
    worksheet.write(i, 2, container)
    worksheet.write(i, 3, userid)
    worksheet.write(i, 4, latitude)
    worksheet.write(i, 5, longitude)
    worksheet.write(i, 6, date_str)
    urls = []
    for x, name in enumerate(filenames):
        worksheet.set_row(i, 372)
        
        worksheet.insert_image(i,x + 7, f"{photo_directory}/{name}")
        
        urls.append(name.split("_")[2].split(".")[0])
    
    for x, url in enumerate(urls):
        worksheet.write(i, x + 7, f"https://data.globe.gov/system/photos/{date_str}/{url}/original.jpg")
        

workbook.close() 