In [1]:
import os
import pandas as pd 
import numpy 
import yaml
from sklearn.model_selection import train_test_split
import firebase_admin
from firebase_admin import credentials, firestore, storage
import os
from tqdm import tqdm 

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
# Initialize Firebase Admin SDK
cred = credentials.Certificate('disaster-master.json')  # Replace with your Firebase Admin SDK key
firebase_admin.initialize_app(cred, {
    'storageBucket': 'disaster-master-59d6f.appspot.com'  # Replace with your storage bucket URL
})

db = firestore.client()
bucket = storage.bucket()

In [3]:
save_path = "/home/julian/git-repo/juliangdz/GovernanceIRP/Autonomous-Governance-in-Disaster-Management/quiz_application/scripts/question_data_bank.csv"
main_log_dir = "/home/julian/git-repo/juliangdz/GovernanceIRP/Autonomous-Governance-in-Disaster-Management/quiz_application/scripts/data_logs"
# "<image> <text> <gt> <pred> <pred_conf> <task> <question_options> <question_format> <question_id> "

In [4]:
# Function to upload image to Firebase Storage
def upload_image(image_path):
    blob = bucket.blob(os.path.basename(image_path))
    blob.upload_from_filename(image_path)
    blob.make_public()
    return blob.public_url

# Function to upload question to Firestore
def upload_question(question_data, image_path):
    image_url = upload_image(image_path)
    question_data['image']= image_url
    db.collection('questions').document(f"q_{question_data['question_id']}").set(question_data)

In [5]:
def load_and_combine_tsv_data(tsv_files:list,dataset_dir:str):
    file_paths = [os.path.join(dataset_dir, file) for file in tsv_files]
    dfs = [pd.read_csv(file_path, sep='\t') for file_path in file_paths]
    combined_df = pd.concat(dfs, ignore_index=True)
    
    cleaned_df = combined_df[combined_df['image_damage'] != 'dont_know_or_cant_judge']
    
    cleaned_df.to_csv(os.path.join(dataset_dir,'whole_dataset.csv'), index=False)
    
    return cleaned_df,os.path.join(dataset_dir,'whole_dataset.csv')

In [6]:
def split_and_save_dataset(dataset, target_column, test_size, random_state, output_dir,task):
    train_data, val_data = train_test_split(
        dataset, test_size=test_size, random_state=random_state, stratify=dataset[target_column]
    )

    os.makedirs(output_dir, exist_ok=True)
    
    train_path = os.path.join(output_dir, f'train_{task}_dataset.csv')
    val_path = os.path.join(output_dir, f'val_{task}_dataset.csv')
    
    train_data.to_csv(train_path, index=False)
    val_data.to_csv(val_path, index=False)
    
    return train_data, val_data

In [7]:
def load_config(file_path='config.yaml'):
    with open(file_path, 'r') as file:
        config = yaml.safe_load(file)
    return config

In [8]:
def select_random_n_records(df, n,target_class,task):
    
    informative_df = df[df[f'image_{task}'] == target_class]
    
    if len(informative_df) < n:
        print(f"Warning: Only {len(informative_df)} informative records available. Returning all of them.")
        return informative_df
    
    return informative_df.sample(n=n)

In [9]:
def select_random_n_records_post_disaster(df, n, target_class):
    if n % 2 != 0:
        n+=1
        # raise ValueError("n must be an even number to ensure a 50:50 ratio.")

    half_n = n // 2

    # Filter the DataFrame for records where target_class is 0 and 1
    class_0_df = df[df[target_class] == 0]
    class_1_df = df[df[target_class] == 1]

    # Check if there are enough records in each class
    if len(class_0_df) < half_n :
        sample_class_0 = class_0_df
        # raise ValueError(f"Not e?nough records to select {half_n} samples from each class.")
    else:
        sample_class_0 = class_0_df.sample(n=half_n)
    
    if len(class_1_df) < half_n:
        sample_class_1 = class_1_df
    else:
        sample_class_1 = class_1_df.sample(n=half_n)

    # Combine the samples and shuffle the resulting DataFrame
    combined_df = pd.concat([sample_class_0, sample_class_1]).sample(frac=1).reset_index(drop=True)

    return combined_df

## Informative Data

In [10]:
info_config = load_config("/home/julian/git-repo/juliangdz/GovernanceIRP/Autonomous-Governance-in-Disaster-Management/disaster_classification/configs/info_config.yaml")

In [11]:
# dataset, whole_dataset_csv_path = load_and_combine_tsv_data(info_config['paths']['tsv_files'], dataset_dir=info_config['paths']['dataset_dir_path'])

# # Create folders
# log_dir = os.path.join(main_log_dir,'multimodal_logs',info_config['paths']['task'])
# os.makedirs(log_dir, exist_ok=True)

# # Filter to use only the dataset for the task columns 
# text_target_column = f"image_{info_config['paths']['task']}"
# filtered_dataset = dataset[['tweet_text', text_target_column, 'image_path']]
# filtered_dataset = filtered_dataset.dropna(subset=[text_target_column, 'image_path'])

# # Apply class relabeling if specified in the config
# if 'classes_to_relabel' in info_config['data'] and info_config['data']['classes_to_relabel']:
#     classes_to_relabel = info_config['data']['classes_to_relabel'] 
#     filtered_dataset[text_target_column] = filtered_dataset[text_target_column].replace(classes_to_relabel)

# # Task specific operations - drop rows with certain classes to reduce noise 
# if len(info_config['data']['classes_to_drop']) > 0:
#     for class_drop in info_config['data']['classes_to_drop']:
#         filtered_dataset = filtered_dataset[filtered_dataset[text_target_column] != class_drop]
        
# train_data, val_data = split_and_save_dataset(filtered_dataset, text_target_column, info_config['model_training_parameters']['test_size'], info_config['model_training_parameters']['random_state'], log_dir, text_target_column)

In [12]:
train_data = pd.read_csv("/home/julian/git-repo/juliangdz/GovernanceIRP/Autonomous-Governance-in-Disaster-Management/quiz_application/scripts/data_logs/multimodal_logs/info/train_image_info_dataset.csv")
val_data = pd.read_csv("/home/julian/git-repo/juliangdz/GovernanceIRP/Autonomous-Governance-in-Disaster-Management/quiz_application/scripts/data_logs/multimodal_logs/info/val_image_info_dataset.csv")

In [13]:
val_data.head()

Unnamed: 0,tweet_text,image_info,image_path
0,Hurricane Harvey Relief Comes With an Extra-La...,not_informative,data_image/hurricane_harvey/14_9_2017/90818048...
1,Mexico City Sport Director Says No Major Damag...,not_informative,data_image/mexico_earthquake/20_9_2017/9105413...
2,Hurricane Maria still has strength. Its impact...,informative,data_image/hurricane_maria/23_9_2017/911530504...
3,RT @johnrobertsFox: Looking to fly out of Hurr...,not_informative,data_image/hurricane_irma/7_9_2017/90562524655...
4,UPDATE: Iran-Iraq earthquake • Death toll at 4...,informative,data_image/iraq_iran_earthquake/13_11_2017/930...


In [14]:
info_training_samples = select_random_n_records(train_data,25,'informative',"info")
noninfo_training_samples = select_random_n_records(train_data,25,'not_informative',"info")

training_samples_df = pd.concat([info_training_samples,noninfo_training_samples],ignore_index=True)

In [15]:
info_validation_samples = select_random_n_records(val_data,50,'informative',"info")
noninfo_validation_samples = select_random_n_records(val_data,50,'not_informative',"info")

validation_samples_df = pd.concat([info_validation_samples,noninfo_validation_samples],ignore_index=True)

In [16]:
class_names = {
    "informative":0,
    "not_informative":1
}

In [17]:
question_record = {
    'question_id': 0,
    'image': None,
    'text': None,
    'pred':None,
    'pred_conf':0,
    'task': 'info',
    'phase':None,
    'correct_answer': None,
    'question_options': ['Informative','Not-Informative', 'Gather Additional Data'],
    'question_format': 'Does the Image and Text data contain information relevant to the Disaster ?'
}

for index,row in tqdm(training_samples_df.iterrows(),desc="Uploading for training"):
    question_record['question_id'] += 1
    question_record['phase'] = 'train'
    image_path = os.path.join(info_config['paths']['dataset_dir_path'],row['image_path'])
    question_record['text'] = row['tweet_text']
    question_record['correct_answer'] = class_names[row['image_info']]
    upload_question(question_record,image_path)


for index,row in tqdm(validation_samples_df.iterrows(),desc="Uploading for validation"):
    question_record['question_id'] += 1
    question_record['phase'] = 'val'
    image_path = os.path.join(info_config['paths']['dataset_dir_path'],row['image_path'])
    question_record['text'] = row['tweet_text']
    question_record['correct_answer'] = class_names[row['image_info']]
    upload_question(question_record,image_path)


Uploading for training: 50it [00:10,  4.66it/s]
Uploading for validation: 100it [00:18,  5.33it/s]


# Humanitarian

In [18]:
human_config = load_config("/home/julian/git-repo/juliangdz/GovernanceIRP/Autonomous-Governance-in-Disaster-Management/disaster_classification/configs/human_config.yaml")

In [19]:
# dataset, whole_dataset_csv_path = load_and_combine_tsv_data(human_config['paths']['tsv_files'], dataset_dir=human_config['paths']['dataset_dir_path'])

# # Create folders
# log_dir = os.path.join(main_log_dir,'multimodal_logs',human_config['paths']['task'])
# os.makedirs(log_dir, exist_ok=True)

# # Filter to use only the dataset for the task columns 
# text_target_column = f"image_{human_config['paths']['task']}"
# filtered_dataset = dataset[['tweet_text', text_target_column, 'image_path']]
# filtered_dataset = filtered_dataset.dropna(subset=[text_target_column, 'image_path'])

# # Apply class relabeling if specified in the config
# if 'classes_to_relabel' in human_config['data'] and human_config['data']['classes_to_relabel']:
#     classes_to_relabel = human_config['data']['classes_to_relabel'] 
#     filtered_dataset[text_target_column] = filtered_dataset[text_target_column].replace(classes_to_relabel)

# # Task specific operations - drop rows with certain classes to reduce noise 
# if len(human_config['data']['classes_to_drop']) > 0:
#     for class_drop in human_config['data']['classes_to_drop']:
#         filtered_dataset = filtered_dataset[filtered_dataset[text_target_column] != class_drop]
        
# train_data, val_data = split_and_save_dataset(filtered_dataset, text_target_column, human_config['model_training_parameters']['test_size'], human_config['model_training_parameters']['random_state'], log_dir, text_target_column)


In [20]:
train_data = pd.read_csv("/home/julian/git-repo/juliangdz/GovernanceIRP/Autonomous-Governance-in-Disaster-Management/quiz_application/scripts/data_logs/multimodal_logs/human/train_image_human_dataset.csv")
val_data = pd.read_csv("/home/julian/git-repo/juliangdz/GovernanceIRP/Autonomous-Governance-in-Disaster-Management/quiz_application/scripts/data_logs/multimodal_logs/human/val_image_human_dataset.csv")

In [21]:
val_data.head()

Unnamed: 0,tweet_text,image_human,image_path
0,WATCH: Police helicopter flies over ferocious ...,infrastructure_and_utility_damage,data_image/california_wildfires/11_10_2017/918...
1,LDS members volunteer during California wildfi...,rescue_volunteering_or_donation_effort,data_image/california_wildfires/16_10_2017/919...
2,RT @lori_english: Parrots 22nd Floor During Ir...,other_relevant_information,data_image/hurricane_irma/17_9_2017/9094909252...
3,"Hurricane Irma, Jose and Harvey damage: US eco...",infrastructure_and_utility_damage,data_image/hurricane_harvey/8_9_2017/906112443...
4,California Wildfire Threatening To Burn Millio...,infrastructure_and_utility_damage,data_image/california_wildfires/15_10_2017/919...


In [22]:
ab_training_samples = select_random_n_records(train_data,25,'infrastructure_and_utility_damage',human_config['paths']['task'])
bc_training_samples = select_random_n_records(train_data,25,'rescue_volunteering_or_donation_effort',human_config['paths']['task'])
dc_training_samples = select_random_n_records(train_data,25,'other_relevant_information',human_config['paths']['task'])
ef_training_samples = select_random_n_records(train_data,25,'affected_individuals',human_config['paths']['task'])

training_samples_df = pd.concat([ab_training_samples,bc_training_samples,dc_training_samples,ef_training_samples],ignore_index=True)

In [23]:
ab_validation_samples = select_random_n_records(val_data,25,'infrastructure_and_utility_damage',human_config['paths']['task'])
bc_validation_samples = select_random_n_records(val_data,25,'rescue_volunteering_or_donation_effort',human_config['paths']['task'])
dc_validation_samples = select_random_n_records(val_data,25,'other_relevant_information',human_config['paths']['task'])
ef_validation_samples = select_random_n_records(val_data,25,'affected_individuals',human_config['paths']['task'])

validation_samples_df = pd.concat([ab_validation_samples,bc_validation_samples,dc_validation_samples,ef_validation_samples],ignore_index=True)

In [24]:
class_names = {"affected_individuals":0,
               "infrastructure_and_utility_damage":1,
               "other_relevant_information":2,
               "rescue_volunteering_or_donation_effort":3
               }

In [25]:
question_id = question_record['question_id']
question_record = {
    'question_id': question_id,
    'image': None,
    'text': None,
    'pred':None,
    'pred_conf':0,
    'task': 'human',
    'phase':None,
    'correct_answer': None,
    'question_options': ['Affected Individuals','Infrastructure and Utility Damage','Other Relevant Information','Rescue Volunteering or Donation Effort',  'Gather Additional Data'],
    'question_format': 'What Type of Humanitarian Information does the Image and Text data Contain ?'
}

for index,row in tqdm(training_samples_df.iterrows(),desc="Uploading for training"):
    question_record['question_id'] += 1
    question_record['phase'] = 'train'
    image_path = os.path.join(human_config['paths']['dataset_dir_path'],row['image_path'])
    question_record['text'] = row['tweet_text']
    question_record['correct_answer'] = class_names[row[f"image_{human_config['paths']['task']}"]]
    upload_question(question_record,image_path)


for index,row in tqdm(validation_samples_df.iterrows(),desc="Uploading for validation"):
    question_record['question_id'] += 1
    question_record['phase'] = 'val'
    image_path = os.path.join(human_config['paths']['dataset_dir_path'],row['image_path'])
    question_record['text'] = row['tweet_text']
    question_record['correct_answer'] = class_names[row[f"image_{human_config['paths']['task']}"]]
    upload_question(question_record,image_path)

Uploading for training: 100it [00:17,  5.64it/s]
Uploading for validation: 100it [00:17,  5.78it/s]


# Damage 

In [26]:
damage_config = load_config("/home/julian/git-repo/juliangdz/GovernanceIRP/Autonomous-Governance-in-Disaster-Management/disaster_classification/configs/damage_config.yaml")

In [27]:
# dataset, whole_dataset_csv_path = load_and_combine_tsv_data(damage_config['paths']['tsv_files'], dataset_dir=damage_config['paths']['dataset_dir_path'])

# # Create folders
# log_dir = os.path.join(main_log_dir,'multimodal_logs',damage_config['paths']['task'])
# os.makedirs(log_dir, exist_ok=True)

# # Filter to use only the dataset for the task columns 
# text_target_column = f"image_{damage_config['paths']['task']}"
# filtered_dataset = dataset[['tweet_text', text_target_column, 'image_path']]
# filtered_dataset = filtered_dataset.dropna(subset=[text_target_column, 'image_path'])

# # Apply class relabeling if specified in the config
# if 'classes_to_relabel' in damage_config['data'] and damage_config['data']['classes_to_relabel']:
#     classes_to_relabel = damage_config['data']['classes_to_relabel'] 
#     filtered_dataset[text_target_column] = filtered_dataset[text_target_column].replace(classes_to_relabel)

# # Task specific operations - drop rows with certain classes to reduce noise 
# if len(damage_config['data']['classes_to_drop']) > 0:
#     for class_drop in damage_config['data']['classes_to_drop']:
#         filtered_dataset = filtered_dataset[filtered_dataset[text_target_column] != class_drop]
        
# train_data, val_data = split_and_save_dataset(filtered_dataset, text_target_column, damage_config['model_training_parameters']['test_size'], damage_config['model_training_parameters']['random_state'], log_dir, text_target_column)


In [28]:
train_data = pd.read_csv("/home/julian/git-repo/juliangdz/GovernanceIRP/Autonomous-Governance-in-Disaster-Management/quiz_application/scripts/data_logs/multimodal_logs/damage/train_image_damage_dataset.csv")
val_data = pd.read_csv("/home/julian/git-repo/juliangdz/GovernanceIRP/Autonomous-Governance-in-Disaster-Management/quiz_application/scripts/data_logs/multimodal_logs/damage/val_image_damage_dataset.csv")

In [29]:
val_data.head()

Unnamed: 0,tweet_text,image_damage,image_path
0,DACA decision hits tens of thousands ravaged b...,little_or_no_damage,data_image/hurricane_harvey/6_9_2017/905315238...
1,Here's how to know if destruction caused by Hu...,little_or_no_damage,data_image/hurricane_harvey/12_9_2017/90743997...
2,How to avoid 'storm chaser' fraud after Harvey...,severe_damage,data_image/hurricane_harvey/11_9_2017/90728021...
3,"Hurricane Irma, Jose and Harvey damage: US eco...",severe_damage,data_image/hurricane_harvey/8_9_2017/906112443...
4,Homes Built to Stricter Building Codes Fared B...,little_or_no_damage,data_image/hurricane_irma/18_9_2017/9098290948...


In [30]:
ab_training_samples = select_random_n_records(train_data,25,'severe_damage',damage_config['paths']['task'])
bc_training_samples = select_random_n_records(train_data,25,'little_or_no_damage',damage_config['paths']['task'])

training_samples_df = pd.concat([ab_training_samples,bc_training_samples],ignore_index=True)

In [31]:
ab_validation_samples = select_random_n_records(val_data,50,'severe_damage',damage_config['paths']['task'])
bc_validation_samples = select_random_n_records(val_data,50,'little_or_no_damage',damage_config['paths']['task'])

validation_samples_df = pd.concat([ab_validation_samples,bc_validation_samples],ignore_index=True)

In [32]:
class_names = {"little_or_no_damage":0,"severe_damage":1}

In [33]:
question_id = question_record['question_id']
question_record = {
    'question_id': question_id,
    'image': None,
    'text': None,
    'pred':None,
    'pred_conf':0,
    'task': 'damage',
    'phase':None,
    'correct_answer': None,
    'question_options': ['Little or No Damage', 'Severe Damage', 'Gather Additional Data'],
    'question_format': 'Can You Identify the Level of Damage ?'
}

for index,row in tqdm(training_samples_df.iterrows(),desc="Uploading for training"):
    question_record['question_id'] += 1
    question_record['phase'] = 'train'
    image_path = os.path.join(damage_config['paths']['dataset_dir_path'],row['image_path'])
    question_record['text'] = row['tweet_text']
    question_record['correct_answer'] = class_names[row[f"image_{damage_config['paths']['task']}"]]
    upload_question(question_record,image_path)


for index,row in tqdm(validation_samples_df.iterrows(),desc="Uploading for validation"):
    question_record['question_id'] += 1
    question_record['phase'] = 'val'
    image_path = os.path.join(damage_config['paths']['dataset_dir_path'],row['image_path'])
    question_record['text'] = row['tweet_text']
    question_record['correct_answer'] = class_names[row[f"image_{damage_config['paths']['task']}"]]
    upload_question(question_record,image_path)

Uploading for training: 50it [00:08,  5.80it/s]
Uploading for validation: 100it [00:17,  5.83it/s]


In [34]:
question_record['question_id']

500

# Satellite Data - No Damage vs Damaged 

In [35]:
sat_train_df = pd.read_csv("/home/julian/git-repo/juliangdz/GovernanceIRP/Autonomous-Governance-in-Disaster-Management/post_disaster_classification/satellite_damage_vs_no_damage_saved_models/train_satellite_damage_vs_no_damage.csv")
sat_val_df = pd.read_csv("/home/julian/git-repo/juliangdz/GovernanceIRP/Autonomous-Governance-in-Disaster-Management/post_disaster_classification/satellite_damage_vs_no_damage_saved_models/val_satellite_damage_vs_no_damage.csv")
class_columns = ["no_damage","major_damage"]


In [36]:
training_samples_df = select_random_n_records_post_disaster(sat_train_df,100,"major_damage")

In [37]:
validation_samples_df = select_random_n_records_post_disaster(sat_val_df,100,"major_damage")

In [38]:
question_id = question_record['question_id'] # value from prev records
question_record = {
    'question_id': question_id,
    'image': None,
    'text': 'Satellite Image of the Region Impacted by the Disaster',
    'pred':None,
    'pred_conf':0,
    'task': 'satellite',
    'phase':None,
    'correct_answer': None,
    'question_options':['No Damage','Major Damage', 'Gather Additional Data'],
    'question_format': 'Can You Identify the Level of Damage ?'
}

for index,row in tqdm(training_samples_df.iterrows(),desc="Uploading for training"):
    question_record['question_id'] += 1
    question_record['phase'] = 'train'
    image_path = row['image_path']
    if not os.path.isfile(image_path):
        print('Does not exist : ',image_path)
        continue
    question_record['correct_answer'] = int(row["major_damage"])
    upload_question(question_record,image_path)


for index,row in tqdm(validation_samples_df.iterrows(),desc="Uploading for validation"):
    question_record['question_id'] += 1
    question_record['phase'] = 'val'
    image_path = row['image_path']
    if not os.path.isfile(image_path):
        print('Does not exist : ',image_path)
        continue
    question_record['correct_answer'] = int(row["major_damage"])
    upload_question(question_record,image_path)

Uploading for training: 100it [00:16,  6.15it/s]
Uploading for validation: 100it [00:16,  6.14it/s]


# Drone Data : No Damage vs Damaged 

In [39]:
drone_train_df = pd.read_csv("/home/julian/git-repo/juliangdz/GovernanceIRP/Autonomous-Governance-in-Disaster-Management/post_disaster_classification/drone_damage_saved_models/train_drone_damage.csv")
drone_val_df = pd.read_csv("/home/julian/git-repo/juliangdz/GovernanceIRP/Autonomous-Governance-in-Disaster-Management/post_disaster_classification/drone_damage_saved_models/val_drone_damage.csv")
class_columns = ["building_no_damage","building_destroyed"]


In [40]:
building_destroyed_training_samples_df = select_random_n_records_post_disaster(drone_train_df,100,"building_destroyed")
building_destroyed_training_samples_df = building_destroyed_training_samples_df[building_destroyed_training_samples_df["building_no_damage"] != 1]


In [41]:
building_destroyed_validation_samples_df = select_random_n_records_post_disaster(drone_val_df,100,"building_destroyed")
building_destroyed_validation_samples_df = building_destroyed_validation_samples_df[building_destroyed_validation_samples_df["building_no_damage"] != 1]


In [42]:
len(building_destroyed_validation_samples_df[building_destroyed_validation_samples_df["building_destroyed"] == 0])

44

In [43]:
len(building_destroyed_validation_samples_df[building_destroyed_validation_samples_df["building_destroyed"] == 1])

49

In [44]:
question_id = question_record['question_id'] # value from prev records
question_id # 700

700

In [45]:
# Uploading for Damage
question_id = question_record['question_id'] # value from prev records
question_record = {
    'question_id': question_id,
    'image': None,
    'text': 'Surveillance Drone Captured Image of the Region Impacted by the Disaster',
    'pred':None,
    'pred_conf':0,
    'task': 'drone-damage',
    'phase':None,
    'correct_answer': None,
    'question_options':['No Damage', 'Damaged',  'Gather Additional Data'],
    'question_format': 'Can You Identify the Level of Damage ?'
}

for index,row in tqdm(building_destroyed_training_samples_df.iterrows(),desc="Uploading for training"):
    question_record['question_id'] += 1
    question_record['phase'] = 'train'
    image_path = row['image_path']
    if not os.path.isfile(image_path):
        print('Does not exist : ',image_path)
        continue
    question_record['correct_answer'] = int(row["building_destroyed"])
    upload_question(question_record,image_path)


for index,row in tqdm(building_destroyed_validation_samples_df.iterrows(),desc="Uploading for validation"):
    question_record['question_id'] += 1
    question_record['phase'] = 'val'
    image_path = row['image_path']
    if not os.path.isfile(image_path):
        print('Does not exist : ',image_path)
        continue
    question_record['correct_answer'] = int(row['building_destroyed'])
    upload_question(question_record,image_path)

Uploading for training: 95it [00:15,  5.96it/s]
Uploading for validation: 93it [00:16,  5.79it/s]


In [46]:
# building_no_damage_training_samples_df = select_random_n_records_post_disaster(drone_train_df,100,"building_no_damage")


In [47]:
# building_no_damage_validation_samples_df = select_random_n_records_post_disaster(drone_val_df,100,"building_no_damage")

In [48]:
# question_id = question_record['question_id'] # value from prev records
# question_id # 700

In [49]:
# # Uploading for No Damage
# question_id = question_record['question_id'] # value from prev records
# question_record = {
#     'question_id': question_id,
#     'image': None,
#     'text': 'Surveillance Drone Captured Image of the Region Impacted by the Disaster',
#     'pred':None,
#     'pred_conf':0,
#     'task': 'drone-no_damage',
#     'phase':None,
#     'correct_answer': None,
#     'question_options': ['Actionable Data', 'Gather Additional Data', 'No Response Necessary'],
#     'question_format': 'Can You Identify the Level of Damage ?'
# }

# for index,row in tqdm(building_no_damage_training_samples_df.iterrows(),desc="Uploading for training"):
#     question_record['question_id'] += 1
#     question_record['phase'] = 'train'
#     image_path = row['image_path']
#     if not os.path.isfile(image_path):
#         print('Does not exist : ',image_path)
#         continue
#     question_record['correct_answer'] = 0
#     upload_question(question_record,image_path)


# for index,row in tqdm(building_no_damage_validation_samples_df.iterrows(),desc="Uploading for validation"):
#     question_record['question_id'] += 1
#     question_record['phase'] = 'val'
#     image_path = row['image_path']
#     if not os.path.isfile(image_path):
#         print('Does not exist : ',image_path)
#         continue
#     question_record['correct_answer'] = 0
#     upload_question(question_record,image_path)

In [50]:
# question_id = question_record['question_id'] # value from prev records
# question_id # 700