# Photo Downloading

In [1]:
import os
import requests
import pandas as pd
import xlsxwriter

from collections import defaultdict
from go_utils import constants, get_api_data
from go_utils.photo_download import download_mhm_photos

## Setup
`input_file`: The CSV containing cleaned GLOBE Obsever Data  
`resolution`: The resolution of the photos to be downloaded  
`photo_directory`: The folder/name of the folder to store the photos in  
`photo_types`: If you want a type of photo to be downloaded mark it with `True` if not, mark it with `False`

In [2]:
input_file = "Mosquito Habitat Mapper-2772.csv"
resolution = (480, 360)
photo_directory = "Africa Photos"
photo_types = {
    "watersource_photo" : True,
    "larvae_photo" : True,
    "abdomen_photo" : True
}

In [3]:
df = pd.read_csv(input_file)

In [4]:
download_kwargs = {key : "" for key, v in photo_types.items() if not v}

In [None]:
targets = download_mhm_photos(df, photo_directory, include_in_name = ["mhm_id"], resolution = resolution, **download_kwargs)

# Spreadsheet Generator

`spreadsheet_name`: The name of the spreadsheet file  
`step`: The number of entries per spreadsheet

In [6]:
spreadsheet_name = "africa-6-30-2022.xlsx"
step = 100

In [7]:
photos = os.listdir(photo_directory)

In [8]:
picture_dict = defaultdict(list)
for photo in photos:
    mhm_entry = photo.split("_")[1]
    
    picture_dict[mhm_entry].append(photo)

In [9]:
mhm_df = get_api_data(constants.mosquito_protocol)

mhm_df

Unnamed: 0,mhm_protocol,mhm_measuredDate,mhm_createDate,mhm_updateDate,mhm_publishDate,mhm_organizationId,mhm_organizationName,mhm_siteId,mhm_siteName,mhm_countryName,...,mhm_IsGenusOfInterest,mhm_IsWaterSourceContainer,mhm_HasWaterSource,mhm_PhotoCount,mhm_RejectedCount,mhm_PendingCount,mhm_PhotoBitBinary,mhm_PhotoBitDecimal,mhm_SubCompletenessScore,mhm_CumulativeCompletenessScore
0,mosquito_habitat_mapper,2022-07-02,2022-07-02 13:50:01,2022-07-02 13:50:01,2022-07-02 14:00:00,17459532,Brazil Citizen Science,282260,21JYM452828,Brazil,...,0,1,1,0,0,1,000,0,0.00,0.81
1,mosquito_habitat_mapper,2022-07-01,2022-07-01 00:35:01,2022-07-01 00:35:01,2022-07-01 00:40:00,17043304,United States of America Citizen Science,282080,16SGC613701,United States,...,0,0,1,0,0,1,000,0,0.00,0.83
2,mosquito_habitat_mapper,2022-07-01,2022-07-01 10:10:03,2022-07-01 10:45:02,2022-07-01 10:50:00,64939791,Slovak Republic Citizen Science,281293,33UXP666442,Slovak Republic,...,0,1,1,0,0,1,000,0,0.00,0.90
3,mosquito_habitat_mapper,2022-07-01,2022-07-01 10:10:03,2022-07-01 10:45:02,2022-07-01 10:50:00,64939791,Slovak Republic Citizen Science,281293,33UXP666442,Slovak Republic,...,0,1,1,0,0,1,000,0,0.00,0.85
4,mosquito_habitat_mapper,2022-07-01,2022-07-01 10:10:03,2022-07-01 10:45:02,2022-07-01 10:50:00,64939791,Slovak Republic Citizen Science,281293,33UXP666442,Slovak Republic,...,0,1,1,0,0,2,000,0,0.00,0.92
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36894,mosquito_habitat_mapper,2017-05-31,2022-02-22 08:37:26,2022-02-22 08:37:26,2022-06-28 16:50:17,17043304,United States of America Citizen Science,52004,10SEG858321,United States,...,0,0,1,0,0,0,000,0,0.00,0.73
36895,mosquito_habitat_mapper,2017-05-31,2022-02-22 08:37:26,2022-02-22 08:37:26,2022-06-28 16:50:17,14564129,"SciStarter Citizen Scientists, LLC GLOBE v-School",47350,16SEE200984,United States,...,0,1,1,1,0,0,100,4,0.25,0.75
36896,mosquito_habitat_mapper,2017-05-31,2022-02-22 08:37:26,2022-02-22 08:37:26,2022-06-28 16:50:17,17043304,United States of America Citizen Science,51975,11SMT087788,United States,...,0,1,1,2,0,0,100,4,0.25,0.83
36897,mosquito_habitat_mapper,2017-05-31,2022-02-22 08:37:26,2022-02-22 08:37:26,2022-06-28 16:50:17,20699469,Alder Creek Community Forest,52001,10TDN756521,United States,...,0,1,1,6,0,0,110,6,0.50,0.87


In [10]:
def make_spreadsheet(observation_ids, spreadsheet_name):
    # Create an new Excel file and add a worksheet.
    workbook = xlsxwriter.Workbook(spreadsheet_name)
    worksheet = workbook.add_worksheet("Photos")



    # Insert an image.
    worksheet.write('A1', 'MHM Id')
    worksheet.write('B1', "Classification")
    worksheet.write('C1', "Container")
    worksheet.write('D1', "Userid")
    worksheet.write('E1', "latitude")
    worksheet.write('F1', "longitude")
    worksheet.write('G1', "date")
    worksheet.write('H1', "Photo")

    file_constant = max([len(files) for files in picture_dict.values()])
    worksheet.set_column(7, 7 + file_constant + 2, 65)
    
    target_dict = {observation : picture_dict[observation] for observation in observation_ids}


    for a, entry in enumerate(target_dict.items()):
        i = a + 1
        mhm_id, filenames = entry
        worksheet.write(i, 0, mhm_id)
        entry = mhm_df[mhm_df["mhm_MosquitoHabitatMapperId"] == int(mhm_id)].to_dict('records')[0]
        userid = entry["mhm_Userid"]
        classification = entry["mhm_Genus"]
        classification = classification if not pd.isna(classification) else ""
        container = entry["mhm_WaterSource"]
        latitude = entry["mhm_Latitude"]
        longitude = entry["mhm_Longitude"]
        date = entry["mhm_measuredDate"]
        date_str = date.strftime("%Y/%m/%d")
        worksheet.write(i, 1, classification)
        worksheet.write(i, 2, container)
        worksheet.write(i, 3, userid)
        worksheet.write(i, 4, latitude)
        worksheet.write(i, 5, longitude)
        worksheet.write(i, 6, date_str)
        urls = []
        for x, name in enumerate(filenames):
            worksheet.set_row(i, 372)

            worksheet.insert_image(i,x + 7, f"{photo_directory}/{name}")

            urls.append(name.split("_")[2].split(".")[0])

        for x, url in enumerate(urls):
            worksheet.write(i, x + 7, f"https://data.globe.gov/system/photos/{date_str}/{url}/original.jpg")


    workbook.close() 

In [11]:
observation_ids = list(picture_dict.keys())

In [12]:
for i in range(0, len(observation_ids), step):
    make_spreadsheet(observation_ids[i: i + step], f"{i}{spreadsheet_name}")