In [1]:
import os
import json
import requests
import pandas as pd
from PIL import Image
from io import BytesIO
from prompts import *
from models import *

In [2]:
read_dir = "2019-ridgecrest_filtered"
files = os.listdir(read_dir)
files = [f for f in files if f.endswith("_image.json")]
files = sorted(files)

dfs = []
for file in files:
    print(file)
    file_path = os.path.join(read_dir, file)
    try:
        data = pd.read_json(file_path, lines=True)
        data = pd.json_normalize(data.to_dict(orient='records'))
        dfs.append(data)
    except ValueError as e:
        print(f"Error reading JSON from file {file}: {e}")

df = pd.concat(dfs)
df = df.drop_duplicates(subset=['id', 'text'])
df.head(5)

earthquake_2019-07-04_image.json
earthquake_2019-07-05_image.json
earthquake_2019-07-06_image.json
earthquake_2019-07-07_image.json
earthquake_2019-07-08_image.json
earthquake_2019-07-09_image.json
earthquake_2019-07-10_image.json
earthquake_2019-07-11_image.json


Unnamed: 0,user,user_name,description,location,protected,verified,created,followers,friends,listed,...,place.id,place.url,place.place_type,place.name,place.full_name,place.country_code,place.country,place.contained_within,place.bounding_box.type,place.bounding_box.coordinates
0,ORIOLELM,L MILLETTE,,,False,False,Sat Jun 02 17:45:28 +0000 2012,161,2730,3,...,,,,,,,,,,
1,theterbush,Brian Terbush,Volcano Geophysicist turned Earthquake/Volcano...,"Washington, USA",False,False,Wed Nov 15 16:54:15 +0000 2017,105,106,5,...,,,,,,,,,,
2,XinzhengLu,Xinzheng Lu,"Director & Professor, Institute of Disaster Pr...","Beijing, China",False,False,Tue Jul 26 10:08:21 +0000 2011,139,129,0,...,,,,,,,,,,
3,elginskye,Elgin-Skye,MSc Student at the School of Interactive Arts ...,"Vancouver, British Columbia",False,False,Wed Jun 03 04:19:16 +0000 2009,734,1200,22,...,,,,,,,,,,
4,almondteatime,Isaree,This is my Japanese study account. I like food...,"California, USA",False,False,Wed Mar 31 21:30:34 +0000 2010,121,525,1,...,,,,,,,,,,


In [4]:
def create_text_prompt(epicenter, tweet):
    prompt = text_mmi_prompts['Task'].format(epicenter=epicenter, text=tweet) \
           + text_mmi_prompts['Output_format'] \
           + text_mmi_prompts['Instruction'].format(epicenter=epicenter)
    return prompt

def create_image_prompt(epicenter):
    prompt = image_mmi_prompts['Task'].format(epicenter=epicenter) \
           + image_mmi_prompts['Output_format'] \
           + image_mmi_prompts['Instruction'].format(epicenter=epicenter)
    return prompt

df['text_prompt'] = df.apply(lambda x: create_text_prompt(epicenter='Ridgecrest, CA', tweet=x['text']), axis=1)
df['image_prompt'] = df.apply(lambda x: create_image_prompt(epicenter='Ridgecrest, CA'), axis=1)
df['prompt'].head(5)

0    You are a seismic expert. The epicenter of thi...
1    You are a seismic expert. The epicenter of thi...
2    You are a seismic expert. The epicenter of thi...
3    You are a seismic expert. The epicenter of thi...
4    You are a seismic expert. The epicenter of thi...
Name: prompt, dtype: object

In [9]:
# This is a test for textual information

text = df['text_prompt'].iloc[2100]
print(text)
response = call_model('gemini', 'text', text)
print(response)

You are a seismic expert. The epicenter of this earthquake is located at Ridgecrest, CA. Please assess the following text posted on Twitter for earthquake-related damage based on the Modified Mercalli Intensity (MMI) Scale: #Breaking: Just in - A mobile home caught on fire this morning at 05:50AM in  #Ridgecrest, the neighbours believe that the gas had not been shut of in that mobile home last night when a 7.1 magnitude of a #earthquake shook the region in #California. https://t.co/zrljREnGgL. 
Return the result in this JSON format: {'MMI': 'your judgment', 'location': 'your identification', 'reason': 'your reasoning'}.1. If the text does not describe any Ridgecrest, CA earthquake-caused damage, return 'None' for MMI.
2. If the damage location is not mentioned in the text, return 'None' for location.
3. Provide your reasoning based on the details mentioned in the text.

{'MMI': 'VII', 'location': 'Ridgecrest, CA', 'reason': 'The text states that a mobile home caught on fire due to a ga

In [10]:
# This is a test for image information

def download_image(url, save_path):
    """Download image from URL and save to a folder."""
    response = requests.get(url)
    if response.status_code == 200:
        image = Image.open(BytesIO(response.content))
        image.save(save_path, 'PNG')
    else:
        print(f"Failed to retrieve image. Status code: {response.status_code}")

In [13]:
image_text = df['image_prompt'].iloc[2100]
image_url = df['unique_image_urls'].iloc[2100][0]

save_image_dir = "images"
if not os.path.exists(save_image_dir):
    os.makedirs(save_image_dir)
save_path = os.path.join(save_image_dir, "test.png")
download_image(image_url, save_path)

response = call_model('gemini', 'image', image_text, save_path)
print(response)

{'MMI': 'None', 'reason': 'The image shows a house fire, not damage caused by an earthquake.'} 

