This notebook scrapes satellite images for each leak repair. For each location it gets a NxM rectangle around the leak before and after it was repaired. Then it collated all the data into h5 files and all the metadata into json files.

It takes days to run because of rate limiting on the google earth api. Because of limited satelite coverage you might find matches for only 10% of the leaks.

## Modifying

- make sure google earth is setup
- load leaks, so they pass the asserts
- change params
- run rest of cells

In [1]:
from path import Path
import arrow
import json
import pytz
import time
from pprint import pprint
from tqdm import tqdm_notebook as tqdm
import re, os, collections, itertools, uuid, logging
import tempfile
import tables

import zipfile
import urllib

import ee
import pyproj
import numpy as np
import scipy as sp
import pandas as pd
import geopandas as gpd
from matplotlib import pyplot as plt
import seaborn as sns
import shapely

plt.rcParams['figure.figsize'] = (15, 5) # bigger plots
plt.style.use('fivethirtyeight')
%matplotlib inline
%precision 4

'%.4f'

In [2]:
# %load_ext autoreload
# %autoreload 2

In [7]:
helper_dir = str(Path('..').abspath())
if helper_dir not in os.sys.path:
    os.sys.path.append(helper_dir)
    
from leak_helpers.earth_engine import display_ee, get_boundary, tifs2np, bands_s1, download_image, bands_s2, bands_l7, bands_l8
from leak_helpers.visualization import imshow_bands

# Load leaks

Load the leaks from a geojson file and make sure they have unique fields REPO_Date and leak_id (see asserts below)

In [11]:
# load 

leaks1 = gpd.read_file('../../data/leak_datasets/austin_leaks/derived/austin_leaks-repairs.geojson')
# leaks2...
leaks_datas = [leaks1]

In [12]:
# join them all, with primary columns and random metadata
primary_cols = ['leak_id','REPO_Date','geometry']
leaks = gpd.GeoDataFrame(pd.concat([leaks_data[primary_cols] for leaks_data in leaks_datas]), crs={'init': 'epsg:4326'})
leaks['metadata'] = np.concatenate([leaks_data.drop(primary_cols,1).to_dict('records') for leaks_data in leaks_datas])
leaks.index = leaks.leak_id
leaks

Unnamed: 0_level_0,leak_id,REPO_Date,geometry,metadata
leak_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ATX-47486,ATX-47486,2009-08-24T19:56:00,POINT (-97.82460426524551 30.24299059253743),"{'SUFFIX': 'CIR', 'ZIP': '78735- ', 'INITDT..."
ATX-47487,ATX-47487,2009-08-24T20:00:00,POINT (-97.82675029997988 30.24219926503761),"{'SUFFIX': 'CIR', 'ZIP': '78735- ', 'INITDT..."
ATX-47488,ATX-47488,2009-08-22T06:05:00,POINT (-97.90543523261422 30.21938883975655),"{'SUFFIX': 'PASS', 'ZIP': '78737- ', 'INITD..."
ATX-47489,ATX-47489,2008-12-11T15:47:00,POINT (-97.74688985035503 30.27942444961227),"{'SUFFIX': 'AVE', 'ZIP': '78701- ', 'INITDT..."
ATX-47490,ATX-47490,2008-12-12T03:09:00,POINT (-97.75148894169132 30.32082458189908),"{'SUFFIX': 'BLVD', 'ZIP': '78731- ', 'INITD..."
ATX-47491,ATX-47491,2008-12-16T16:24:00,POINT (-97.76771015540359 30.2064011266074),"{'SUFFIX': 'DR', 'ZIP': '78745- ', 'INITDTT..."
ATX-47492,ATX-47492,2008-12-04T05:52:00,POINT (-97.75918111620598 30.29698530319657),"{'SUFFIX': 'LN', 'ZIP': '78703- ', 'INITDTT..."
ATX-47493,ATX-47493,2008-12-18T11:41:00,POINT (-97.7354500990549 30.30641876306587),"{'SUFFIX': 'ST', 'ZIP': '78751- ', 'INITDTT..."
ATX-47494,ATX-47494,2010-03-08T00:30:00,POINT (-97.82803137965742 30.24776340772963),"{'SUFFIX': 'LN', 'ZIP': '78735- ', 'INITDTT..."
ATX-47495,ATX-47495,2008-12-08T03:05:00,POINT (-97.71051676533885 30.26298823681678),"{'SUFFIX': 'ST', 'ZIP': '78702- ', 'INITDTT..."


In [13]:
# limit leaks to the time and space where satellite data exists
print('before',len(leaks))

# limit it to after satelite came into service
leaks= leaks[pd.to_datetime(leaks.REPO_Date)>pd.Timestamp('Oct 3, 2014')]

# # also limit them by location
# satellite_bounds = shapely.geometry.box(
#     # continentental us
#     minx = -124.7844079, # west long
#     miny =  24.7433195, # south lat
#     maxx = -66.9513812, # east long
#     maxy = 49.3457868, # north lat   
# )
# leaks = leaks[leaks.intersects(satellite_bounds)]
print('after',len(leaks))

before 23131
after 4796


In [14]:
assert 'REPO_Date' in leaks.columns, 'should have REPO_Date columns with the leak repair date'
assert leaks.REPO_Date.apply(lambda x:arrow.get(x)).all(), 'should be parsable via arrow'
assert 'leak_id' in leaks.columns, 'should have unique leak_id column'
assert leaks.leak_id.apply(lambda x:'_' not in x).all(), 'should have no underscore in id'

## Params

Customise the values in the cell below

In [15]:
# rename your notebook to start a new dataset

# params
bands = bands_s1 # list of satelite band names from earth engine
satellite = 'COPERNICUS/S1_GRD' # satelite name from earth engine
resolution_min = 10.0 # the lowest resolution on earth engine

# how many pixels hight and wide you image will be (centered on leak), should be odd, e.g. 25
pixel_length = 25.0 

# you need to tweak this until you pass the "Test the distance need to get your rectangle" cell
fudge_distance_factor = -0.5

## Init

In [16]:
%%javascript
// get notebook name
var command = "notebook_name = '" + IPython.notebook.notebook_path.replace('.ipynb','') + "'";
IPython.notebook.kernel.execute(command);

<IPython.core.display.Javascript object>

In [17]:
notebook_name

'scraped_satellite_images/scraping_earth_engine_s1-all'

In [18]:
# constant params, probobly don't change
time_bin_delta = 60*60*24*28 # how long before a leak to look (in seconds)
crs_grid = 3857 # keep this as auxilary sphere, this is the CRS the downloaded images will be in

# init
## init directories
ts=arrow.utcnow().format('YYYYMMDD-HH-mm-ss')
temp_dir = Path('/tmp/{}'.format(notebook_name))
output_dir = Path('../../data/scraped_satellite_images/downloaded_images_{}_{}'.format(notebook_name,satellite.replace('/','-')))
cache_dir = output_dir.joinpath('cache')
output_dir.makedirs_p()
temp_dir.makedirs_p()
cache_dir.makedirs_p()

## init logger
logger = logging.getLogger(notebook_name)
# logger.setLevel(logging.WARN)

temp_dir, output_dir, cache_dir

(Path('/tmp/scraped_satellite_images/scraping_earth_engine_s1-all'),
 Path('../data/downloaded_images_scraped_satellite_images/scraping_earth_engine_s1-all_COPERNICUS-S1_GRD'),
 Path('../data/downloaded_images_scraped_satellite_images/scraping_earth_engine_s1-all_COPERNICUS-S1_GRD/cache'))

In [24]:
# record cofig in a json file
metadata = dict(
    notebook_name=notebook_name,
    satellite=satellite,
    time_bin_delta=time_bin_delta,
    pixel_length=pixel_length,
    resolution_min=resolution_min,
    bands=bands,
    ts=ts,
    crs_grid=crs_grid,
    cache_dir=str(cache_dir),
    temp_dir=str(temp_dir),
    output_dir=str(output_dir),
)
metadata_file = output_dir.joinpath('script_metadata.json')
json.dump(metadata, open(metadata_file,'w'))

# earth engine

## Steps:
- first need to apply for an account and wait ~ 1day
- Setup instructions [here](https://developers.google.com/earth-engine/python_install#setting-up-authentication-credentials)

## Refs/examples:
- api https://developers.google.com/earth-engine/
- code examples https://code.earthengine.google.com/
- sentinel1 https://developers.google.com/earth-engine/sentinel1
    - `ee.ImageCollection('satellite');`
    - `ee.ImageCollection('COPERNICUS/S1_GRD');`
- keras and google earth https://github.com/patrick-dd/landsat-landstats

In [25]:
# test earth-engine setup
from oauth2client import crypt # should have not error
import ee
ee.Initialize() # should give no errors, if so follow instructions


# test
image = ee.Image('srtm90_v4')
assert image.getInfo()=={'type': 'Image', 'properties': {'system:time_start': 950227200000, 'system:asset_size': 18827626666, 'system:time_end': 951177600000}, 'bands': [{'data_type': {'type': 'PixelType', 'max': 32767, 'min': -32768, 'precision': 'int'}, 'crs': 'EPSG:4326', 'id': 'elevation', 'dimensions': [432000, 144000], 'crs_transform': [0.000833333333333, 0.0, -180.0, 0.0, -0.000833333333333, 60.0]}], 'id': 'srtm90_v4', 'version': 1463778555689000}
print('ok')

ok


In [26]:
# test earth-engine setup
from oauth2client import crypt # should have not error
import ee
ee.Initialize() # should give no errors, if so follow instructions


# test
image = ee.Image(ee.ImageCollection(satellite).first())
info = image.getInfo()
info

{'bands': [{'crs': 'EPSG:32649',
   'crs_transform': [25.0000,
    0.0000,
    121126.5406,
    0.0000,
    -25.0000,
    9304939.2309],
   'data_type': {'precision': 'float', 'type': 'PixelType'},
   'dimensions': [28690, 24380],
   'id': 'HH'},
  {'crs': 'EPSG:32649',
   'crs_transform': [25.0000,
    0.0000,
    121126.5406,
    0.0000,
    -25.0000,
    9304939.2309],
   'data_type': {'precision': 'float', 'type': 'PixelType'},
   'dimensions': [28690, 24380],
   'id': 'HV'},
  {'crs': 'EPSG:4326',
   'crs_transform': [-0.8707, -0.8644, 126.1510, 0.1329, -0.1273, 81.3603],
   'data_type': {'precision': 'float', 'type': 'PixelType'},
   'dimensions': [21, 23],
   'id': 'angle'}],
 'id': 'COPERNICUS/S1_GRD/S1A_EW_GRDH_1SDH_20141003T003636_20141003T003740_002658_002F54_ECFA',
 'properties': {'GRD_Post_Processing_facility_country': 'Italy',
  'GRD_Post_Processing_facility_name': 'ESRIN headquarters',
  'GRD_Post_Processing_facility_organisation': 'ESRIN',
  'GRD_Post_Processing_facilit

# Fetching images

For each point
- find the nearest image before the repair
- and the soonest image after repair
- save a part of each with metadata

Later will can filter, interpolate, read into numpy arrays, and save to hdf file

In [27]:
import dataset
cache_file = 'sqlite:///{}'.format(cache_dir.dirname().joinpath('cache.db'))
db = dataset.connect(cache_file)
cache_table = db.get_table('cached_ids', primary_id='leak_id', primary_type='String')

def get_cached_ids():
    return set(row['leak_id'] for row in cache_table.distinct('leak_id'))

def init_cache(leak_id):
    """We will cache downloads in folders like 'id_after'"""
    if leak_id:
        try:
            cache_table.insert(dict(leak_id=leak_id))
        except:
            db.rollback()
        else:
            db.commit()
    return get_cached_ids()
init_cache(1)

{'1', '5', '7'}

In [28]:
# def get_cached_ids():
#     cache_dirs = [str(f.relpath(cache_dir)).split('_')[0] for f in cache_dir.listdir()]
#     return cache_dirs

# def init_cache(leak_id):
#     """We will cache downloads in folders like 'id_after'"""
#     if leak_id:
#         cache_subdir = cache_dir.joinpath(leak_id+'_after')
#         cache_subdir.makedirs_p()
#         cache_subdir = cache_dir.joinpath(leak_id+'_before')
#         cache_subdir.makedirs_p()
#     return get_cached_ids()

### Test the distance need to get your rectangle

Here we need to tweak `fudge_distance_factor` so that we get the image size of our choice. Start with zero and try -1, -0.5, -.25,0,0.25,0.5,0.75. This is to deal with rounding, projecting between CRS's etc. Don't worry the asserts below will yet you know when it's right.

Occasionaly the problem might be that the leak is at the edge of the image, giving a cropped image. Ignore these rare cases.

In [29]:
distance = resolution_min*(pixel_length/2.0+fudge_distance_factor)

In [30]:
# test with one image
for i in np.random.choice(leaks.index,5):
    leak=leaks.loc[[i]]
    leak_id = str(leak['leak_id'].values[0])

    repo_date_ts = arrow.get(leak.REPO_Date.values[0]).timestamp
    boundary = get_boundary(leak, distance=distance)
    sentinel2_before = ee.ImageCollection(satellite)\
        .filterBounds(boundary)\
        .filterDate(933828614605,1488776737937)
    image = ee.Image(sentinel2_before.first()).clip(boundary)
    image.getInfo()
    name=leak_id+'_after'
    path,files=download_image(
        image, 
        scale=resolution_min, 
        crs=crs_grid, 
        name=name,
        cache_dir=Path('/tmp')
    )
    data = tifs2np(path,files,bands=bands)
    print(i,[(d.shape,d.sum()) for d in data])
    for d in data:
        assert d.shape[0]==pixel_length, 'the downloaded image is the wrong size, tweak distance'
        assert d.shape[1]==pixel_length
    assert np.sum(data)!=0, 'should not be empty (make sure you are using the right bands)'

KeyboardInterrupt: 

In [32]:
import time
import traceback
cached_ids = get_cached_ids()


def get_image_for_leak(i, cached_ids=cached_ids):    
    leak = leaks.loc[[i]]
    repo_date_ts = arrow.get(leak.REPO_Date.values[0]).timestamp
    
    
    # crappy way of recording that we tried this one
    leak_id = leak.leak_id.values[0]
    if leak_id in cached_ids:
        logger.info('Skipping cached download for leak id %s ',leak_id)
        return
    
    boundary = get_boundary(leak, distance=distance)
    
    
    # get image day before    
    sentinel2_before = ee.ImageCollection(satellite)\
        .filterBounds(boundary)\
        .filterDate((repo_date_ts-time_bin_delta)*1000,(repo_date_ts)*1000)\
        .sort('system:time_start', opt_ascending=False) # first will be latest
    
    results = sentinel2_before.size().getInfo()
    if results<1:
        logger.info('Error no results for day before %s',leak_id)
        cached_ids = init_cache(leak_id) # so we know there where no results
        return
        
    # get image day after
    sentinel2_after = ee.ImageCollection(satellite)\
        .filterBounds(boundary)\
        .filterDate((repo_date_ts)*1000,(repo_date_ts+time_bin_delta*6)*1000)\
        .sort('system:time_start', opt_ascending=True) # first will be earliest
        
    results = sentinel2_after.size().getInfo()
    if results<1:
        logger.info('Error no results for day after, id %s',leak_id)
        cached_ids = init_cache(leak_id) # so we know there where no results
        return
        
    # download as save images    
    logger.info('results for %s', leak_id)
    image = ee.Image(sentinel2_before.first()).clip(boundary)
    name=leak_id+'_before'
    path,files=download_image(
        image, 
        scale=resolution_min, 
        crs=crs_grid, 
        name=name,
        cache_dir=cache_dir
    )
    # also save metadata so we can filter by date
    with open(path.joinpath('metadata.json'), 'w') as fo:
        metadata = dict(
            image=image.getInfo(),
            scale=resolution_min,
            crs=crs_grid,
            name=name,
            distance=distance,
            leak=json.loads(leak.to_json())
        )
        json.dump(metadata, fo)

    image = ee.Image(sentinel2_after.first()).clip(boundary)
    name=leak_id+'_after'
    path,files=download_image(
        image, 
        scale=resolution_min, 
        crs=crs_grid, 
        name=name,
        cache_dir=cache_dir
    )
    with open(path.joinpath('metadata.json'), 'w') as fo:
        metadata = dict(
            image=image.getInfo(),
            scale=resolution_min,
            crs=crs_grid,
            name=name,
            distance=distance,
            leak=json.loads(leak.to_json())
        )
        json.dump(metadata, fo)
    
    cached_ids = init_cache(leak_id) # so we know there where no results
        
    return

# could take 27 hours
leak_to_scrape = set(leaks.leak_id).difference(set(cached_ids))
for i in tqdm(leak_to_scrape):
    try:
        get_image_for_leak(i)
    except urllib.error.HTTPError as e:
        print(i,e) # "HTTP Error 429: unknown"
        traceback.print_stack()
        if e.code == 429:
            print('sleep for 13s')
            time.sleep(13);
    except ee.ee_exception.EEException as e:
        print(i,e) # "Earth Engine memory capacity exceeded."
        traceback.print_stack()
        ee.Initialize()
    except zipfile.BadZipFile as e:
        print(i,e) # "File is not a zip file"
        traceback.print_stack()
    except Exception as e:
        print(i,e)
#         raise(e)
        # e.g. "An internal server error has occurred (216bc442fe171620592bc53fb578bceb)."
        traceback.print_stack()




# load tiffs to arrays

When there are errors, e.g. no metadata.json is directory, delete the directory and go back to the scraping step

TODO automate the directory deletion

In [19]:
# This loads it as X and y for machine learning, and also time and metadata so we can filter
import shapely
X = []
y = []
t = []
m = []
discarded=[]
cdirs = [cdir for cdir in cache_dir.listdir() if ('_after_' in cdir) or ('_before_' in cdir)]
for path in tqdm(cdirs):
    files = [file.relpath(path) for file in path.listdir() if file.endswith('.tif')]
    if files:
        # check metadata
        try:
            metadata = json.load(open(path.joinpath('metadata.json')))
        except (FileNotFoundError, ValueError) as e:
            path.move(path.dirname().dirname().joinpath('.deleteme-'+str(uuid.uuid4())))
            if '_after_' in path: # also delete the before path                
                path_after = Path(path.replace('_after_','_before_'))
                if path_after.isdir():
                    path_after.move(path.dirname().dirname().joinpath('.deleteme-'+str(uuid.uuid4())))
            logger.error('Invalid metadata.json, deleted folder %s, please rerun scraping cell to rescrape this image', path)
            continue
        
        # e.g. lets filter it so "before" image are only 1 day before
        if '_before_' in path.basename():
            yy = True
        else:
            yy = False
        
        # work out time gap too
        t1 = arrow.get(metadata['image']['properties']['system:time_end']/1000)
        t0 = arrow.get(metadata['leak']['features'][0]['properties']['REPO_Date'])
        td=t1-t0
        tt = td.total_seconds()
        
        # load data
        data = tifs2np(path,files,bands=bands)
             
        # check we don't have empty bands 1-13
        empty_bands = np.array([d.sum() for d in data])==0
        
        # lets check we didn't get the edge of an image
        bbox = np.array(metadata['image']['properties']['system:footprint']['coordinates'][0])
        loc = metadata['leak']['features'][0]['geometry']['coordinates']
        minx=bbox[:,0].min()
        maxx=bbox[:,0].max()
        miny=bbox[:,1].min()
        maxy=bbox[:,1].max()
        bbox_shp = shapely.geometry.box(
            minx=minx,
            maxx=maxx,
            miny=miny,
            maxy=maxy
        )
        loc_shp = shapely.geometry.Point(loc[0],loc[1])
        shapely.geometry.GeometryCollection([bbox_shp, loc_shp])
        try:
            assert loc_shp.intersects(bbox_shp), 'leak location should be inside image'
            assert bbox_shp.centroid.almost_equals(loc_shp, decimal=5), 'leak should be near center of image'
            assert (np.array([d.shape for d in data])==pixel_length).all(), 'image area should be the right amount of pixels'
            assert (maxx-minx)/(maxy-miny)<1.3, 'should be roughly square'
            assert (maxx-minx)/(maxy-miny)>0.7, 'should be roughly square'
            assert not empty_bands.all(), 'should not have all bands empty'
        except Exception as exc:
            print(path, exc)
#             raise(exc)
            discarded.append(path)
        else:
            X.append(data)
            y.append(yy)
            t.append(tt)
            m.append(metadata)
        

len(X), len(discarded)




(0, 0)

In [34]:
# shuffle
from sklearn.utils import shuffle
X,y,m,t = shuffle(X,y,m,t,random_state=1337)

In [35]:
# save using hdf5 (so keras can easily load it) and json 
import h5py
h5file = output_dir.joinpath('data.h5')
with h5py.File(h5file, 'w') as h5f:
    h5f.create_dataset('X', data=X)
    h5f.create_dataset('y', data=y)

json.dump(m,open(output_dir.joinpath('data_metadata.json'),'w'))

with open(output_dir.joinpath('readme.md'),'w') as fo:
    fo. write("""
Files:
- cache- cached tiff files
- script_metadata.json - information on scraping script
- data.h5 contains X, y, and t.
    - X: tiff files for each band loaded into an array of shape (Leak, Bands, width, length)
    - y: True for before the leak, False for after
- data_metadata: array of metadata for each leak in X. Each contain info on leak, image, and image search
    
Loading: 
```py
# load
metadatas = json.load(open('data_metadata.json'))
with h5py.File('data.h5','r') as h5f:
    X2 = h5f['X'][:]
    y2 = h5f['y'][:]
    t2 = h5f['t'][:]
y
```
    """)

In [36]:
# test load
metadatas = json.load(open(output_dir.joinpath('data_metadata.json')))
with h5py.File(output_dir.joinpath('data.h5'),'r') as h5f:
    X2 = h5f['X'][:]
    y2 = h5f['y'][:]
    t2 = h5f['t'][:]
X2.shape, y2, t2, metadatas[0].keys()

((8607, 5, 25, 25),
 array([False,  True,  True, ..., False,  True,  True], dtype=bool),
 array([ 11083802.003,   -181486.568,    -95632.653, ...,   1978451.457,
           -35975.277,   -374740.941]),
 dict_keys(['leak', 'name', 'crs', 'image', 'scale', 'distance']))

In [37]:
output_dir

Path('../data/downloaded_images_scraping_earth_engine_s1-all_COPERNICUS-S1_GRD')

# NOTES

Can I speed this up? What is the bottleneck internet or filesystem?

`%prun get_image_for_leak(5)`
```

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
       38   12.424    0.327   12.424    0.327 {method 'read' of '_ssl._SSLSocket' objects}
        8    1.153    0.144    1.153    0.144 {method 'do_handshake' of '_ssl._SSLSocket' objects}
        8    0.716    0.090    0.716    0.090 {built-in method getaddrinfo}
        8    0.541    0.068    0.542    0.068 {method 'connect' of '_socket.socket' objects}
        6    0.015    0.003    0.015    0.003 {method 'load_verify_locations' of '_ssl._SSLContext' objects}
       78    0.005    0.000    0.005    0.000 {built-in method open}
       36    0.002    0.000    0.013    0.000 zipfile.py:1257(_extract_member)
        6    0.002    0.000    0.003    0.000 ssl.py:202(_dnsname_match)
8700/2736    0.002    0.000    0.003    0.000 encoder.py:325(_iterencode_dict)
       27    0.002    0.000    0.002    0.000 {built-in method __new__ of type object at 0x9ceec0}
     6889    0.001    0.000    0.002    0.000 {built-in method isinstance}
     2734    0.001    0.000    0.001    0.000 {method 'write' of '_io.TextIOWrapper' objects}
       38    0.001    0.000    0.001    0.000 {built-in method stat}
```


`%prun download_image(image, scale=resolution_min,     crs=crs_grid,     name=name,    cache_dir=cache_dir)`
```
 ncalls  tottime  percall  cumtime  percall filename:lineno(function)
       14    4.477    0.320    4.477    0.320 {method 'read' of '_ssl._SSLSocket' objects}
        2    0.286    0.143    0.286    0.143 {method 'do_handshake' of '_ssl._SSLSocket' objects}
        2    0.270    0.135    0.270    0.135 {built-in method getaddrinfo}
        2    0.134    0.067    0.135    0.067 {method 'connect' of '_socket.socket' objects}
        1    0.002    0.002    0.002    0.002 {method 'load_verify_locations' of '_ssl._SSLContext' objects}
       38    0.002    0.000    0.002    0.000 {built-in method open}
       18    0.001    0.000    0.004    0.000 zipfile.py:1257(_extract_member)
      450    0.000    0.000    0.001    0.000 posixpath.py:99(split)
        4    0.000    0.000    0.000    0.000 {built-in method __new__ of type object at 0x9ceec0}
       18    0.000    0.000    0.000    0.000 {built-in method stat}
      450    0.000    0.000    0.001    0.000 path.py:382(splitpath)
       36    0.000    0.000    0.002    0.000 path.py:452(splitall)
      724    0.000    0.000    0.000    0.000 path.py:156(__get__)
```