In [2]:
from pymongo import MongoClient
import time
import os
import random

client = MongoClient('localhost', 27017)
db = client['USDA_NAIP_DOQQ']
collection = db['m_4207511_sw_18_060_20221022']

# get one document
start_time = time.time()
doc = collection.find_one()
end_time = time.time()
print(f"Time taken to get one document: {end_time - start_time} seconds")

# get the image patch from the document
patch = doc['patch']
# get the size of the patch file
patch_size = len(patch)
print(f"Size of the patch file: {patch_size} bytes")

Time taken to get one document: 0.03676414489746094 seconds
Size of the patch file: 522956 bytes


In [3]:
datasets = client.list_database_names()
for dataset in datasets:
    if dataset not in ['admin', 'config', 'local']:
        print(dataset)

USDA_NAIP_DOQQ


In [27]:
import matplotlib.pyplot as plt
import io
from PIL import Image, ImageFile

# load specific image
COLLECTION = 'm_1915517_se_05_060_20220331_20220909'
PATCH = '6_2'

# get the collection and test time
start_time = time.time()
collection = db.get_collection(COLLECTION)
test_time = time.time() - start_time
print(f"Time to load collection: {test_time:.2f}s")

# get the patch and test time
start_time = time.time()
patch = collection.find_one({'name': PATCH})
test_time = time.time() - start_time
print(f"Time to load patch: {test_time:.2f}s")



Time to load collection: 0.00s
Time to load patch: 0.05s


In [28]:
ImageFile.LOAD_TRUNCATED_IMAGES = True
img = Image.open(io.BytesIO(patch['patch'])).convert('RGB')

plt.imshow(img)

TypeError: 'NoneType' object is not subscriptable

In [29]:
from tqdm import tqdm
patches = collection.find({}, {"name": 1})
name_list = []
id_list = []
for name in patches:
    name_list.append(name['name'])
    id_list.append(name['_id'])
print(name_list)
# shuffling the list
random.shuffle(name_list)
start_time = time.time()
for name in tqdm(name_list, desc='Loading patches by name'):
    patch = collection.find_one({'name': name})
    img = Image.open(io.BytesIO(patch['patch'])).convert('RGB')
end_time = time.time()
print('Time elapsed:', end_time - start_time, 'throughput: ', len(name_list) / (end_time - start_time))
random.shuffle(id_list)
start_time = time.time()
for _id in tqdm(id_list, desc='Loading patches by _id'):
    patch = collection.find_one({'_id': _id})
    img = Image.open(io.BytesIO(patch['patch'])).convert('RGB')
end_time = time.time()
print('Time elapsed:', end_time - start_time, 'throughput: ', len(id_list) / (end_time - start_time))

['11_7', '1_12', '6_10', '2_8', '5_13', '5_2', '2_1', '8_13', '3_12', '1_7', '4_10', '6_4', '6_3', '4_5', '8_2', '3_6', '8_5', '10_11', '12_6', '13_7', '2_10', '9_4', '2_7', '9_3', '12_10', '2_9', '7_2', '11_8', '7_5', '8_4', '12_7', '3_7', '3_9', '4_4', '7_12', '4_11', '10_8', '13_13', '1_1', '10_6', '10_1', '9_11', '13_14', '12_3', '3_4', '3_3', '10_13', '4_7', '7_11', '6_6', '9_12', '13_10', '1_2', '10_5', '2_4', '13_4', '2_13', '9_7', '8_11', '9_9', '7_6', '6_12', '7_1', '11_2', '11_5', '7_8', '13_11', '10_4', '6_9', '9_14', '10_3', '9_13', '6_7', '4_6', '8_6', '4_8', '12_5', '8_1', '11_4', '7_9', '11_3', '5_1', '13_5', '2_12', '9_6', '8_10', '12_12', '13_2']


Loading patches by name: 100%|██████████| 88/88 [00:00<00:00, 173.77it/s]


Time elapsed: 0.5113587379455566 throughput:  172.09053736629252


Loading patches by _id: 100%|██████████| 88/88 [00:00<00:00, 136.74it/s]

Time elapsed: 0.6477766036987305 throughput:  135.8493028268234





In [25]:
print(len(name_list))
print('patches/sec:', len(name_list) / (end_time - start_time))

145
patches/sec: 9.138160511162583


In [3]:
# delete database named 'RSVLD' in mongodb
client.drop_database('RSVLD')