[GridFS](https://docs.mongodb.com/manual/core/gridfs/) stores files that exceed BSON's document size limit of 16MB. Files are encoded as ascii readible characters using [base64](https://docs.python.org/3/library/base64.html). This notebook provides basic read/write functions on a local MongoDB.

In [8]:
import pymongo
import gridfs
import base64 # what are other alternatives? Any benefits?

In [9]:
def create_fs(dbName, fsName, port=27017):
    dbUrl = f'mongodb://localhost:{port}/'
    client = pymongo.MongoClient(dbUrl)
    db = client[dbName]
    fs = gridfs.GridFS(db, collection=fsName)
    return fs

def write_new_file_to_db(filename, fs):
    with open(filename, "rb") as f:  # "rb" flag for "read bytes"
        encoded_string = base64.b64encode(f.read())
    with fs.new_file(
        chunkSize=800000,
        filename=filename) as fp:
        fp.write(encoded_string)
        
def write_file_to_disk(filename, newFilename, fs):
    # Standard query to Mongo
    data = fs.find_one(filter=dict(filename=filename))
    with open(newFilename, "wb") as f:
        f.write(base64.b64decode(data.read()))

In [10]:
# read and write pdf
filename='bartok.pdf'
newFilename = 'bartok_copy.pdf'
dbName = 'papahana'
fsName = 'fs'

fs = create_fs(dbName, fsName)
write_new_file_to_db(filename, fs)
write_file_to_disk(filename, newFilename, fs)

In [11]:
# read and write jpg
filename='waterfall_jump.JPG'
newFilename = 'waterfall_jump_copy.JPG'
write_new_file_to_db(filename, fs)
write_file_to_disk(filename, newFilename, fs)