In [118]:
environment = "ti"
servers = ['server']
folders = ['rfc']

In [None]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from IPython.core.display import display, HTML, Markdown
import imgkit
import json
import os
from datetime import datetime
from arcgis.gis import GIS

if environment == "ti":
    url_domain = "maps-testing.water.noaa.gov"
    service_suffix = "_alpha"
elif environment == "uat":
    url_domain = "maps-staging.water.noaa.gov"
    service_suffix = "_beta"
elif environment == "prod":
    url_domain = "maps.water.noaa.gov"
    service_suffix = ""
else:
    raise Exception(f"{environment} is an invalid environment. Must be ti, uat, or prod")
    
config = imgkit.config(wkhtmltoimage=r"C:\Users\arcgis\Documents\wkhtmltopdf\bin\wkhtmltoimage.exe")
    

gis_host = f"https://{url_domain}/portal"
gis = GIS(gis_host, os.getenv('EGIS_USERNAME'), os.getenv('EGIS_PASSWORD'), verify_cert=False)
gis_request_data = {
    'token': gis._con.token,
    'f': 'json'
}
    
def printmd(string):
    display(Markdown(string))

for server in servers:
    base_url = f"https://{url_domain}/{server}/rest/services"

    print(f"Getting services for the following folders {folders} on the {server} server")
    all_services = []
    for folder in folders:
        res = requests.get(f"{base_url}/{folder}?f=pjson", params=gis_request_data)
        services = res.json()['services']
        map_services = [service['name'] for service in services if service['type'] == 'MapServer' and service_suffix in service['name']]
        all_services.extend(map_services)

    print(f"Getting metadata for {len(all_services)} services")
    for index, service in enumerate(all_services):
        service_name = service.split("/")[1]
        printmd(f"# {service}")

        service_metadata = requests.get(f"{base_url}/{service}/MapServer?f=pjson", params=gis_request_data).json()
        printmd(f"### Description")
        print(service_metadata['serviceDescription'].split("\n")[0])
        printmd(f"### Map Name")
        print(service_metadata['mapName'])

        metadata_json = {
            "Description": service_metadata['serviceDescription'].split("\n")[0], 
            "Map Name": service_metadata['mapName'],
            "Metadata Updated": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
        }

        layers = requests.get(f"{base_url}/{service}/MapServer/layers?f=pjson", params=gis_request_data).json()
        tables = layers['tables']
        layers = layers['layers']
        legends = requests.post(f"{base_url}/{service}/MapServer/legend?f=html", data=gis_request_data)
        soup = BeautifulSoup(legends.content, 'html.parser')
        legend_tables = soup.find_all('table')[3].find_all('table')
        for layer in layers:
            printmd(f"## {layer['name']}")
            metadata_json[f"Layer {layer['id']}"] = layer['name']
                    
            if layer['type'] == "Raster Layer":
                fields = [{"Name":"NA","Alias":"NA"}]
            else:
                printmd(f"### Fields")
                fields = requests.get(f"{base_url}/{service}/MapServer/{layer['id']}/query?where=oid>=1&text=&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Foot&relationParam=&outFields=*&returnGeometry=false&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&havingClause=&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&historicMoment=&returnDistinctValues=false&resultOffset=&resultRecordCount=1&returnExtentOnly=false&datumTransformation=&parameterValues=&rangeValues=&quantizationParameters=&featureEncoding=esriDefault&f=json", params=gis_request_data).json()
                if not fields['features']:
                    printmd(f"# <font color='red'>MISSING DATA</font>")
                    fields = [{"Name":field,"Alias":alias} for field, alias in fields['fieldAliases'].items()]
                else:
                    fields = [{"Name":field,"Alias":alias, "Data":fields['features'][0]['attributes'][field]} for field, alias in fields['fieldAliases'].items()]
            display(pd.DataFrame(fields))
            pd.DataFrame(fields).to_csv(f"hydrovis_tracker\\{service_name}_{layer['id']}_fields.csv", index=False)

            printmd(f"### Legend")
            legend_html = str(legend_tables[layer['id']])
            display(HTML(legend_html))
            imgkit.from_string(legend_html, f"hydrovis_tracker\\{service_name}_{layer['id']}_legend.png", config=config)
                               
                               
        for table in tables:
            printmd(f"## {table['name']}")
            metadata_json[f"Table {table['id']}"] = table['name']
                    
            printmd(f"### Fields")
            fields = requests.get(f"{base_url}/{service}/MapServer/{table['id']}/query?where=oid>=1&text=&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Foot&relationParam=&outFields=*&returnGeometry=false&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&havingClause=&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&historicMoment=&returnDistinctValues=false&resultOffset=&resultRecordCount=1&returnExtentOnly=false&datumTransformation=&parameterValues=&rangeValues=&quantizationParameters=&featureEncoding=esriDefault&f=json", params=gis_request_data).json()
            if not fields['features']:
                printmd(f"# <font color='red'>MISSING DATA</font>")
                fields = [{"Name":field,"Alias":alias} for field, alias in fields['fieldAliases'].items()]
            else:
                fields = [{"Name":field,"Alias":alias, "Data":fields['features'][0]['attributes'][field]} for field, alias in fields['fieldAliases'].items()]
            display(pd.DataFrame(fields))
            pd.DataFrame(fields).to_csv(f"hydrovis_tracker\\{service_name}_{table['id']}_fields.csv", index=False)
                                        
        
        json_file = f"hydrovis_tracker\\{service_name}_metadata.json"
        with open(json_file, "w") as outfile:
            outfile.write(json.dumps(metadata_json))