In [None]:
! pip install python-dateutil opencage nltk ipyleaflet -q

In [None]:
import nltk

nltk.download('stopwords')

In [None]:
import boto3
import dateutil
import json
import os
import requests


from nltk.corpus import stopwords
from transformers import pipeline, RobertaForTokenClassification, RobertaTokenizerFast, Trainer, TrainingArguments
from opencage.geocoder import OpenCageGeocode

BUCKET_NAME = <update>
INFERENCE_URL = <update>
OPENCAGE_API_KEY = <update>
THRESHOLD = 0.17

In [None]:
## Conver NER format to JSON format

unique_labels = ['O', 'DATE', 'LOCATION']  # add all your labels here
label_dict = {label: i for i, label in enumerate(unique_labels)}
STOP_WORDS = stopwords.words("english")

model = RobertaForTokenClassification.from_pretrained(
    'finetuned_roberta_ner',
    num_labels=len(unique_labels) # This should match your total number of NER tags
)

tokenizer = RobertaTokenizerFast.from_pretrained('finetuned_roberta_ner', add_prefix_space=True)

ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)

def ner_to_dict(query):
    ner_result = ner_pipeline(query)
    result = dict()
    for item in ner_result:
        if item['entity'] in ["LABEL_1", "LABEL_2"]:
            if item['entity'] in result.keys():
                result[item['entity']] += item['word']
            else:
                result[item['entity']] = ""
                result[item['entity']] += item['word']
    old_keys = list(result.keys())
    
    for key in old_keys:
        new_key = unique_labels[int(key[-1])]
        result[new_key] = result[key]
        del result[key]
    
    for key, val in result.items():
        result[key] = val.replace('Ġ', ' ')[1:]
        result[key] = ' '.join([word for word in result[key].split() if word not in STOP_WORDS])
        print(key, result[key])

    return result

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, DataCollatorWithPadding

# Load your domain-specific encoder model (replace 'model_name' with your model's name)
model_name = 'event_classifier'
classifier_tokenizer = AutoTokenizer.from_pretrained('event_tokenizer')
classifier_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=4) # 3 event types + 1 'None' class

classifier_pipeline = pipeline("text-classification", model=classifier_model, tokenizer=classifier_tokenizer)
CLASSES = ['burn_scars', 'crops', 'flood', None]

def classifier_to_dict(query):
    classifier_result = classifier_pipeline(query)
    class_index = int(classifier_result[0]['label'].split('LABEL_')[-1])
    return { 'event': CLASSES[class_index] }


In [None]:
identifier = <update>

config_filename = 'burn_scars_Prithvi_100M.py'
new_config_filename = f"configs/{identifier}-{config_filename}"
MODEL_NAME = f"{identifier}-workshop.pth"

# Check files in the s3 bucket.
def check_files():
    s3 = boto3.resource('s3')
    bucket = s3.Bucket(BUCKET_NAME)
    for obj in bucket.objects.all():
        string_key = str(obj.key)
        if new_config_filename in string_key:
            config_filename = f"s3://{BUCKET_NAME}/{obj.key}"
        elif MODEL_NAME in string_key:
            model_filename = f"s3://{BUCKET_NAME}/{obj.key}" 
    return { 'config': config_filename, 'model': model_filename }

uploaded_files = check_files()

In [None]:
from nltk.corpus import stopwords
def geocode(location: str) -> str:
    """Geocode a query (location, region, or landmark)"""
    opencage_geocoder = OpenCageGeocode(OPENCAGE_API_KEY)
    response = opencage_geocoder.geocode(location, no_annotations="1")
    if response:
        bounds = response[0]["geometry"]

        # convert to bbox
        return [
            bounds["lng"] - THRESHOLD,
            bounds["lat"] - THRESHOLD,
            bounds["lng"] + THRESHOLD,
            bounds["lat"] + THRESHOLD,
        ]


def infer(query):
    result = ner_to_dict(query)
    result['event'] = classifier_to_dict(query)

    if not(result.get('event')) or not(result.get('LOCATION') or not(result.get('DATE'))):
        print("Please provide valid location name, date, or event type")
        return
    bounding_box = geocode(result['LOCATION'])
    print(result)
    date_str = dateutil.parser.parse(result['DATE']).strftime('%Y-%m-%d')

    payload = json.dumps({
        "config_path": uploaded_files['config'],
        "model_path": uploaded_files['model'],
        "model_type": result['event'],
        "date": date_str,
        "bounding_box": bounding_box
    })
    print(payload)
    headers = {
        'Content-Type': 'application/json'
    }

    # Use deployed app to get inference on the selected date/location
    response = requests.request(
        "POST", 
        f"{INFERENCE_URL}/infer", 
        headers=headers, 
        data=payload
    )

    return {'predictions': response.json(), 'bbox': bounding_box, 'date': date_str, 'event': result['event'], 'location': result['LOCATION']}


In [None]:
query = "Show me burn scars in maui from august 13, 2023"
predictions = infer(query)

In [None]:
predictions

In [None]:

from ipyleaflet import Map, TileLayer, DrawControl, GeoJSON

datestring = predictions['date']
HLSL30_TILE_LAYER = 'https://gitc-a.earthdata.nasa.gov/wmts/epsg3857/best/wmts.cgi?TIME=' + datestring + '&layer=HLS_L30_Nadir_BRDF_Adjusted_Reflectance&style=default&tilematrixset=GoogleMapsCompatible_Level12&Service=WMTS&Request=GetTile&Version=1.0.0&Format=image%2Fpng&TileMatrix={z}&TileCol={x}&TileRow={y}'
HLSS30_TILE_LAYER = 'https://gitc-a.earthdata.nasa.gov/wmts/epsg3857/best/wmts.cgi?TIME=' + datestring + '&layer=HLS_S30_Nadir_BRDF_Adjusted_Reflectance&style=default&tilematrixset=GoogleMapsCompatible_Level12&Service=WMTS&Request=GetTile&Version=1.0.0&Format=image%2Fpng&TileMatrix={z}&TileCol={x}&TileRow={y}'

hlsl30_tile_layer = TileLayer(url=HLSL30_TILE_LAYER, name='HLSL30', attribution='NASA')
hlss30_tile_layer = TileLayer(url=HLSS30_TILE_LAYER, name='HLSL30', attribution='NASA')

geojson = predictions['predictions']['predictions']

detection_map = Map(
        center=(
            (predictions['bbox'][1] + predictions['bbox'][3]) / 2,
            (predictions['bbox'][0] + predictions['bbox'][2]) / 2,
        ),
        zoom=11, 
    )
detection_map.add(hlsl30_tile_layer)
detection_map.add(hlss30_tile_layer)
detection_map.add(GeoJSON(data=geojson))

detection_map