In [1]:
import json, datetime
from os import listdir, path
import pandas as pd

The cell below is meant to look at the building transform json file for the whole dataset provided on the xView2 website with the following information:

>Building Transforms from XBD
>If you are looking for mapping information related to the XBD dataset, use the link below. This download doesn't contain any of the images, just metadata around the location of the image and its origin.

It is not used in the rest of this notebook.

In [None]:
with open(path.join('dataset', 'xview_geotransforms.json')) as f:
    global_data_info = json.load(f)
global_data_info

In [4]:
json_path = path.join('dataset', 'train', 'labels')

In [5]:
len([f for f in listdir(json_path) if path.isfile(path.join(json_path, f))])

5598

Liste de features à compter / stocker :
* feature types
* A set of coords
* date
* disaster_type
* disaster

In [6]:
disaster_data = {
    'disaster' : [],
    'disaster_type' : [],
    'date' : [],
    'lgt' : [],
    'lat' : [],
    'severity' : [],
    'highest_severity' : [],
    'moment' : [], 
    'file' : []
}

features = []
subtypes = []

severity_grade = {'destroyed': 3,
  'major-damage': 2,
  'minor-damage': 1,
  'no-damage': 0,
  'un-classified': 0}

In [7]:
for file in [f for f in listdir(json_path) if path.isfile(path.join(json_path, f))]:
    severity = 1
    severity_label = ''
    with open(path.join(json_path, file)) as fi:
        file_data = json.load(fi)
        disaster_data['disaster'].append(file_data['metadata']['disaster'])
        disaster_data['disaster_type'].append(file_data['metadata']['disaster_type'])
        disaster_data['date'].append(datetime.datetime.strptime(file_data['metadata']['capture_date'].split('.')[0], '%Y-%m-%dT%H:%M:%S'))
        if (len(file_data['features']['lng_lat']) > 0):
            lgt = file_data['features']['lng_lat'][0]['wkt'].split(" ")[1].replace("((", "")
            lat = file_data['features']['lng_lat'][0]['wkt'].split(" ")[2].replace(",", "")
            disaster_data['lgt'].append(float(lgt))
            disaster_data['lat'].append(float(lat))
            for feature in file_data['features']['lng_lat']:
                features.append(feature['properties']['feature_type'])
                if 'subtype' in feature['properties'].keys():
                    subtypes.append(feature['properties']['subtype'])
                    severity += severity_grade[feature['properties']['subtype']]
                    if severity_label == '':
                        severity_label = feature['properties']['subtype']
                    elif severity_grade[feature['properties']['subtype']] > severity_grade[severity_label]:
                        severity_label = feature['properties']['subtype']
                else:
                    subtypes.append(None)
        else:
            disaster_data['lgt'].append(None)
            disaster_data['lat'].append(None)
        disaster_data['severity'].append(severity)
        if severity_label == '':
            disaster_data['highest_severity'].append('Unknown')
        else:
            disaster_data['highest_severity'].append(severity_label)
        if '_pre_' in file:
            disaster_data['moment'].append('pre')
        else:
            disaster_data['moment'].append('post')
        disaster_data['file'].append(file)

feature_set = set(features)
subtype_set = set(subtypes)
df = pd.DataFrame(disaster_data)
df.head()

Unnamed: 0,disaster,disaster_type,date,lgt,lat,severity,highest_severity,moment,file
0,socal-fire,fire,2018-11-14 18:42:58,-118.737523,34.14723,1,no-damage,post,socal-fire_00000986_post_disaster.json
1,hurricane-florence,flooding,2018-04-06 15:49:36,-77.943453,34.758699,1,Unknown,pre,hurricane-florence_00000398_pre_disaster.json
2,hurricane-florence,flooding,2018-09-20 16:04:41,-77.884941,34.789438,41,major-damage,post,hurricane-florence_00000395_post_disaster.json
3,hurricane-matthew,wind,2016-10-10 16:04:56,,,1,Unknown,post,hurricane-matthew_00000239_post_disaster.json
4,santa-rosa-wildfire,fire,2017-05-21 19:19:19,-122.676137,38.533348,1,Unknown,pre,santa-rosa-wildfire_00000300_pre_disaster.json


In [8]:
feature_set, subtype_set

({'building'},
 {None,
  'destroyed',
  'major-damage',
  'minor-damage',
  'no-damage',
  'un-classified'})

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5598 entries, 0 to 5597
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   disaster          5598 non-null   object        
 1   disaster_type     5598 non-null   object        
 2   date              5598 non-null   datetime64[ns]
 3   lgt               4566 non-null   float64       
 4   lat               4566 non-null   float64       
 5   severity          5598 non-null   int64         
 6   highest_severity  5598 non-null   object        
 7   moment            5598 non-null   object        
 8   file              5598 non-null   object        
dtypes: datetime64[ns](1), float64(2), int64(1), object(5)
memory usage: 393.7+ KB


In [10]:
df.disaster.value_counts()

disaster
socal-fire             1646
hurricane-michael       686
hurricane-florence      638
hurricane-harvey        638
midwest-flooding        558
hurricane-matthew       476
santa-rosa-wildfire     452
mexico-earthquake       242
palu-tsunami            226
guatemala-volcano        36
Name: count, dtype: int64

In [11]:
df[df['lgt'].isna()]['disaster'].value_counts()

disaster
socal-fire             830
midwest-flooding        70
hurricane-harvey        38
hurricane-matthew       30
hurricane-florence      26
hurricane-michael       18
santa-rosa-wildfire     14
palu-tsunami             4
guatemala-volcano        2
Name: count, dtype: int64

In [12]:
import plotly.express as px

In [None]:
fig = px.scatter_geo(data_frame=df.dropna(), 
                     lat=df['lat'],
                     lon=df['lgt'],
                     color=df['disaster_type'],
                     projection='equirectangular', 
                     hover_name=df['disaster'],
                     size=df['severity'], 
                     animation_frame=df['moment'],
                     width=1500, height=768)

fig.show()

In [14]:
import random
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec

In [15]:
def four_images(file:str, json_path:str):
    img = file.replace('_pre', '').replace('_post', '').replace('_disaster.json', '')
    img_path = json_path.replace('labels', 'images')
    target_path = json_path.replace('labels', 'targets')
    pre_img = path.join(img_path, img + '_pre_disaster.png')
    post_img = path.join(img_path, img + '_post_disaster.png')
    pre_target = path.join(target_path, img + '_pre_disaster_target.png')
    post_target = path.join(target_path, img + '_post_disaster_target.png')
    return pre_img, post_img, pre_target, post_target
        

In [None]:
rand_file = random.choice([f for f in listdir(json_path) if path.isfile(path.join(json_path, f))])

plt.figure(figsize=(15, 15))
plt.subplot(2, 2, 1)
plt.imshow(plt.imread(four_images(rand_file, json_path)[0]))
plt.axis('off')
plt.title('pre image')
plt.subplot(2, 2, 2)
plt.imshow(plt.imread(four_images(rand_file, json_path)[1]))
plt.axis('off')
plt.title('post image')
plt.subplot(2, 2, 3)
plt.imshow(plt.imread(four_images(rand_file, json_path)[2]))
plt.axis('off')
plt.title('pre target')
plt.subplot(2, 2, 4)
plt.imshow(plt.imread(four_images(rand_file, json_path)[3]))
plt.axis('off')
plt.title('post target')
plt.show()

df[df['file'] == rand_file]

In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5598 entries, 0 to 5597
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   disaster          5598 non-null   object        
 1   disaster_type     5598 non-null   object        
 2   date              5598 non-null   datetime64[ns]
 3   lgt               4566 non-null   float64       
 4   lat               4566 non-null   float64       
 5   severity          5598 non-null   int64         
 6   highest_severity  5598 non-null   object        
 7   moment            5598 non-null   object        
 8   file              5598 non-null   object        
dtypes: datetime64[ns](1), float64(2), int64(1), object(5)
memory usage: 393.7+ KB


In [None]:
plt.figure(figsize=(20, 20))
plt.subplot(1, 2, 1)
plt.pie(df['disaster_type'].value_counts(), labels=df['disaster_type'].value_counts().index)
plt.title('Disaster repartition')
plt.subplot(1, 2, 2)
plt.pie(df['highest_severity'].value_counts(), labels=df['highest_severity'].value_counts().index)
plt.title('Highest severity')
plt.show()

