## Demo MongoDB
In this section, we learn how to manipulate data on MongoDB.

Required:
pip install pymongo

We cover:
* CRUD operations
* Table to Dataframe

In [1]:
!pip3 install pymongo

Collecting pymongo
  Downloading pymongo-3.11.3-cp38-cp38-manylinux2014_x86_64.whl (531 kB)
[K     |████████████████████████████████| 531 kB 20.1 MB/s eta 0:00:01
[?25hInstalling collected packages: pymongo
Successfully installed pymongo-3.11.3


### CRUD - MongoDB

In [1]:
import pymongo
from datetime import datetime

pymongo.__version__

'3.11.3'

In [23]:
print('connecting to MongoDB server...')
db = pymongo.MongoClient("mongodb://localhost:27017/")

print('connected')

connecting to MongoDB server...
connected


In [24]:
# create database. MongoDB does not create a database until we create content
mydb = db["pydb"]

In [25]:
dbs = db.list_database_names()
dbs

['admin', 'config', 'local', 'pydb']

In [26]:
# create collection ~ table
# a collection is not created until it gets content
colproducts = mydb["products"]

In [6]:
# listing of collections
colls = mydb.list_collection_names()
colls

[]

In [7]:
def create_data(coll):
    print('inserting data...')
    for i in range(1,5):
        product = {
            "name": "product " + str(i),
            "code": "F2" + str(i),
            "price": i*0.21,
            "quantity": i + 5,
            "created": datetime.now()
        }       
        item = coll.insert_one(product)
        inserted_id = item.inserted_id
        print('inserted with id=',inserted_id)

    print('done')

In [27]:
create_data(colproducts)

inserting data...
inserted with id= 605eb4f976a73e4d36b8d50a
inserted with id= 605eb4f976a73e4d36b8d50b
inserted with id= 605eb4f976a73e4d36b8d50c
inserted with id= 605eb4f976a73e4d36b8d50d
done


In [9]:
# insert multiple data
products = [
    {"name": "product 100","code": "F51","price": 2.5,"quantity": 10,"created": datetime.now()},
    {"name": "product 101","code": "F52","price": 3.5,"quantity": 12,"created": datetime.now()},
    {"name": "product 102","code": "F53","price": 4.5,"quantity": 14,"created": datetime.now()},
    {"name": "product 103","code": "F54","price": 5.5,"quantity": 16,"created": datetime.now()},
    {"name": "product 104","code": "F55","price": 6.5,"quantity": 18,"created": datetime.now()},
]

items = colproducts.insert_many(products)
print(items.inserted_ids)

[ObjectId('605eb42976a73e4d36b8d504'), ObjectId('605eb42976a73e4d36b8d505'), ObjectId('605eb42976a73e4d36b8d506'), ObjectId('605eb42976a73e4d36b8d507'), ObjectId('605eb42976a73e4d36b8d508')]


In [10]:
def read_data(coll):
    print('reading data....')
    selected_id = None
    for item in coll.find():
        print("{}, {}, {}, {}, {}, {:%d %b %Y %H:%M:%S}".format(
                item['_id'], item['name'], item['code'], item['price'], item['quantity'], item['created']))
        if selected_id is None:
            selected_id = item['_id']

    print('done')
    return selected_id

In [11]:
selected_id = read_data(colproducts)

reading data....
605eb41676a73e4d36b8d500, product 1, F21, 0.21, 6, 27 Mar 2021 04:27:02
605eb41676a73e4d36b8d501, product 2, F22, 0.42, 7, 27 Mar 2021 04:27:02
605eb41676a73e4d36b8d502, product 3, F23, 0.63, 8, 27 Mar 2021 04:27:02
605eb41676a73e4d36b8d503, product 4, F24, 0.84, 9, 27 Mar 2021 04:27:02
605eb42976a73e4d36b8d504, product 100, F51, 2.5, 10, 27 Mar 2021 04:27:21
605eb42976a73e4d36b8d505, product 101, F52, 3.5, 12, 27 Mar 2021 04:27:21
605eb42976a73e4d36b8d506, product 102, F53, 4.5, 14, 27 Mar 2021 04:27:21
605eb42976a73e4d36b8d507, product 103, F54, 5.5, 16, 27 Mar 2021 04:27:21
605eb42976a73e4d36b8d508, product 104, F55, 6.5, 18, 27 Mar 2021 04:27:21
done


In [12]:
selected_id

ObjectId('605eb41676a73e4d36b8d500')

In [13]:
def update_data(coll, id):
    print('updating data with idproduct=', id, '...')
    updated_data = {
        "price": "9.99",
        "quantity": 100
    }
    coll.update_one({"_id":id},{"$set":updated_data})    
    print('done')

In [14]:
update_data(colproducts, selected_id)

updating data with idproduct= 605eb41676a73e4d36b8d500 ...
done


In [15]:
read_data(colproducts)

reading data....
605eb41676a73e4d36b8d500, product 1, F21, 9.99, 100, 27 Mar 2021 04:27:02
605eb41676a73e4d36b8d501, product 2, F22, 0.42, 7, 27 Mar 2021 04:27:02
605eb41676a73e4d36b8d502, product 3, F23, 0.63, 8, 27 Mar 2021 04:27:02
605eb41676a73e4d36b8d503, product 4, F24, 0.84, 9, 27 Mar 2021 04:27:02
605eb42976a73e4d36b8d504, product 100, F51, 2.5, 10, 27 Mar 2021 04:27:21
605eb42976a73e4d36b8d505, product 101, F52, 3.5, 12, 27 Mar 2021 04:27:21
605eb42976a73e4d36b8d506, product 102, F53, 4.5, 14, 27 Mar 2021 04:27:21
605eb42976a73e4d36b8d507, product 103, F54, 5.5, 16, 27 Mar 2021 04:27:21
605eb42976a73e4d36b8d508, product 104, F55, 6.5, 18, 27 Mar 2021 04:27:21
done


ObjectId('605eb41676a73e4d36b8d500')

In [16]:
def delete_data(coll, id):
    print('deleting data on idproduct=', id, '...')
    coll.delete_one({"_id":id})    
    print('done')

In [17]:
delete_data(colproducts, selected_id)

deleting data on idproduct= 605eb41676a73e4d36b8d500 ...
done


In [18]:
read_data(colproducts)

reading data....
605eb41676a73e4d36b8d501, product 2, F22, 0.42, 7, 27 Mar 2021 04:27:02
605eb41676a73e4d36b8d502, product 3, F23, 0.63, 8, 27 Mar 2021 04:27:02
605eb41676a73e4d36b8d503, product 4, F24, 0.84, 9, 27 Mar 2021 04:27:02
605eb42976a73e4d36b8d504, product 100, F51, 2.5, 10, 27 Mar 2021 04:27:21
605eb42976a73e4d36b8d505, product 101, F52, 3.5, 12, 27 Mar 2021 04:27:21
605eb42976a73e4d36b8d506, product 102, F53, 4.5, 14, 27 Mar 2021 04:27:21
605eb42976a73e4d36b8d507, product 103, F54, 5.5, 16, 27 Mar 2021 04:27:21
605eb42976a73e4d36b8d508, product 104, F55, 6.5, 18, 27 Mar 2021 04:27:21
done


ObjectId('605eb41676a73e4d36b8d501')

In [19]:
def delete_all(coll):
    print('deleting all data....')
    items = coll.delete_many({})
    print(items.deleted_count, " documents deleted.")
    print('done')

In [20]:
delete_all(colproducts)

deleting all data....
8  documents deleted.
done


In [21]:
read_data(colproducts)

reading data....
done


In [22]:
db.close()
print('closed connection')

closed connection


## MongoDB - Pandas


sqlalchemy does not support for NoSQL like MongoDB

In [28]:
# load MongoDB table to pandas
import pandas as pd

products = colproducts.find()
df = pd.DataFrame(list(products))
df

Unnamed: 0,_id,name,code,price,quantity,created
0,605eb4f976a73e4d36b8d50a,product 1,F21,0.21,6,2021-03-27 04:30:49.732
1,605eb4f976a73e4d36b8d50b,product 2,F22,0.42,7,2021-03-27 04:30:49.737
2,605eb4f976a73e4d36b8d50c,product 3,F23,0.63,8,2021-03-27 04:30:49.738
3,605eb4f976a73e4d36b8d50d,product 4,F24,0.84,9,2021-03-27 04:30:49.740


In [29]:
# load dataframe
products = {'name': ['Product A1', 'Product A2', 'Product A3'],           
            'code': ['E01', 'E02', 'E03'],
            'price': [2.1, 3.6, 3.5],
            'quantity': [5, 7, 3],
            'created': ['2020-10-18 20:12:21', '2020-10-18 20:12:21', '2020-10-18 20:12:21']
           }

dataFrame   = pd.DataFrame(data=products) 
dataFrame

Unnamed: 0,name,code,price,quantity,created
0,Product A1,E01,2.1,5,2020-10-18 20:12:21
1,Product A2,E02,3.6,7,2020-10-18 20:12:21
2,Product A3,E03,3.5,3,2020-10-18 20:12:21


In [30]:
# insert dataframe to mongodb
colexproducts = mydb["exproducts"]
data = dataFrame.to_dict(orient='records') 
colexproducts.insert_many(data)

<pymongo.results.InsertManyResult at 0x7f983c37ef80>

In [31]:
exproducts = colexproducts.find()
df = pd.DataFrame(list(exproducts))
df

Unnamed: 0,_id,name,code,price,quantity,created
0,605eb53876a73e4d36b8d50e,Product A1,E01,2.1,5,2020-10-18 20:12:21
1,605eb53876a73e4d36b8d50f,Product A2,E02,3.6,7,2020-10-18 20:12:21
2,605eb53876a73e4d36b8d510,Product A3,E03,3.5,3,2020-10-18 20:12:21


## Drop Collections

In [32]:
# Delete collections
colproducts.drop()
colexproducts.drop()
print('done')

done


In [33]:
db.close()
print('closed connection')

closed connection
