<a href="https://colab.research.google.com/github/Giffy/MongoDB_PyMongo_Tutorial/blob/master/2_Basic_PyMongo_guide.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Basic PyMongo guide


*   CRUD part 2
*   Query operators
>-  Count
>-  Maximum and Minimum
>-  Inclusion, exclusion operators IN and NIN
>-  Relational operators "greater than or equal", "greater than", "equal", "little than", "little than or equal"
>-  Logical Query Operators
>-  Exists & not exists
*   List items that belong to a list or not
*   Indexes


#1. System setup

##1.1 Install PyMongo (if required)

In [0]:
# Install PyMongo
! python -m pip install pymongo==3.7.2

##1.2 Import PyMongo and set database

In [0]:
import datetime                           # Imports datetime library
import pymongo                            # Imports PyMongo library
from pymongo import MongoClient           # Imports MongoClient 

# uri (uniform resource identifier) defines the connection parameters 
# uri = 'mongodb:// USER : PASSWORD @ SERVER_NAME : PORT / DATABASENAME')
# uri = 'mongodb:// USER : PASSWORD @ SERVER_NAME : PORT / DATABASE_NAME, CLUSTER_1_NAME : PORT , CLUSTER_2_NAME : PORT')
uri = 'localhost:27017'
# start client to connect to MongoDB server 
client = MongoClient( uri )

In [0]:
client.list_database_names()               # From previous module, we will have already a database called 'people'

In [0]:
client.drop_database('people')             # We delete previous module data

##1.3 Download database and import dataset into mongodb

In [0]:
# Downloading JSON with Agenda
!wget https://raw.githubusercontent.com/Giffy/Personal_dataset_repository/master/contacts.json
  
# Uploading data to Mongo Database
!mongodb-linux-x86_64-debian71-3.0.15/bin/mongoimport --jsonArray --db people --collection addressbook  /content/contacts.json

# Reference https://www.kenwalger.com/blog/nosql/mongodb/importing-data-mongoimport/

#2. Database overview

In [0]:
db = client.people                        # Set the database to work on
db.list_collection_names()                # List the collections available

In [0]:
collection = db.addressbook               # Set the collection to work on

##2.1 Dataset size and attributes

In [0]:
## Dataset content summary
num_documents = collection.count_documents({'_id' : {'$exists' : 1}})
attributes = list (collection.find().limit(1)[1])     ## WARNING use as reference , NoSQL db can have different attributes by document

print ('Number of documents : %d' % num_documents)
print ('Attributes names : %s' % attributes)

### Warning  NoSQL database can have multiple attributes per document

In [0]:
## Dataset content summary
def content_attribute( attribute_name ):
  return collection.distinct( attribute_name )

for attribute_name in attributes:
    content =  content_attribute( attribute_name )
    sample = content
    if len(content) > 20:
      sample = content[:20]
    print ('Item name : ' + attribute_name + 
           '\n   Unique content : ' + str(len(content)) +
           '\n   Content : ' + str(sample) )

In [0]:
## We can list one document
list ( collection.find().limit(1) )

#3. Create Read Update Delete - Part 2

##3.1 Adding and delete attributes

### Add new atribute to your collection

In [0]:
collection.update_many( {"age" :{ "$gte" :0 }}, {"$set" : { "favoriteColor" : "red" }})

In [0]:
collection.update_many( {"age" :{ "$nin" : [""] }}, {"$set" : { "favoriteBook" : "Harry Potter" }})

In [0]:
list(collection.find({"age" : 20} , {"favoriteColor","favoriteBook", "name","age"}).limit(1))

### Delete attribute and data

In [0]:
## Deprecated   collection.update( {"age" :{ "$gte" :0 }}, {"$unset" :{ "favoriteColor" :1 }}, {multi :1})
collection.update_many( {"age" :{ "$gte" :0 }}, {"$unset" :{ "favoriteColor" :1 , "favoriteBook" :1}})

In [0]:
list(collection.find({"age" : 20} , {"favoriteColor","favoriteBook", "name","age"}).limit(1))

##3.2 Data visualization and Queries

###3.2.1 Visualization of attribute names  -  first level

In [0]:
print (list (collection.find({"age" : 38, "gender" : 'female'}).limit(1)[1]))

###3.2.2 Visualization of attribute unique content (sorted)

In [0]:
query = collection.distinct( "age" )
print ("Age :" + str( sorted( query ) ))

query = collection.distinct( "gender" ) 
print ("Gender :" + str( sorted( query ) ))

- Exercise:  Check the unique eye colors and favorite fruit

      hint: attributes =  eyeColor and favoriteFruit

###3.2.3 Find document by id

In [0]:
# Get the id of existing document

documents = collection.find( {"_id": {"$exists": True}} , ['name','age']).limit(1)

itemId = ""
for item in documents:
  itemId = str( item['_id'] )

print (itemId) 

In [0]:
# First import objectid object
from bson.objectid import ObjectId

In [0]:
list(collection.find({"_id": ObjectId( itemId )} , ['name','age','favoriteFruit','company.email']))   ##  reading second level attribute 'email'

###3.2.4 Filter by fields

In [0]:
# collection.find(  FILTER_CONDITION   ,  FIELDS_to_retreive )
# FIELDS      1 field:      string
#             more than 1 : a list [] 

filters = {"isActive": True}
fields = ['name','age', 'isActive','company.email']

list(collection.find( filters , fields ).limit(1))

In [0]:
print( collection.count_documents( filters ) )

In [0]:
# Multiple filters

filters = {"$or": [{"age" : 28}, {"age" : 29}] , "gender" : 'female'}

print( collection.count_documents ( filters ) ) # count in Mongo the found documents 
print( len (list (collection.find( filters ))) ) # count in Python the found documents

###3.2.5 Find by regex ( name starts with ' Ki* ')

In [0]:
import re
regex = re.compile('^Ki', re.IGNORECASE)

# find documents which ssn starts with Ki

filters = { 'name' : regex }
fields = { '_id' : 0, 'name' : 1, 'isActive' : 1, 'age' : 1 }     #  Hide _id in reply  

list ( collection.find( filters , fields ) )

###3.2.6 Sort query Ascending

In [0]:
# Ascending
list ( collection.find( filters , fields ).sort('age', pymongo.ASCENDING) )

In [0]:
# Descending 
list ( collection.find( filters , fields ).sort('age', pymongo.DESCENDING) )

#4. Query operators

##4.1 Count

In [0]:
# Count documents with "age" equal to 38
collection.count_documents({"age": 38})
# the previous method was : collection.find({"age": 38}).count()  (deprecated)

##4.2 Maximum and Minimum

In [0]:
#Maximum
list( collection.find({},{"_id": 0, "age": 1}).sort('age', pymongo.DESCENDING).limit(1))  # gets maximum age from MongoDB

In [0]:
max( collection.distinct( "age" ))   # gets all ages range from MongoDB and python gets the maximum

In [0]:
#Minimum
list( collection.find({},{"_id": 0, "age": 1}).sort('age', pymongo.ASCENDING).limit(1))   # gets minimum age from MongoDB

In [0]:
min( collection.distinct( "age" ))   # gets all ages range from MongoDB and python gets the maximum

In [0]:
agemale   = collection.find({"gender" : 'male'}).distinct( "age" )
agefemale = collection.find({"gender" : 'female'}).distinct( "age")

print ('Male -  Min age: ' + str(min(agemale)) + ' and Max age: ' + str(max(agemale)))
print ('Female -  Min age: ' + str(min(agefemale)) + ' and Max age: ' + str(max(agefemale)))

##4.3 Inclusion, exclusion operators  IN and  NIN

In [0]:
print( collection.count_documents( { "name" : { "$in": [ "Kimberley Chase", "Kinney Wynn" ] }} ))    # includes the names in count
print( collection.count_documents( { "name" : { "$nin": [ "Kimberley Chase", "Kinney Wynn" ] }} ))   # excludes the names in count

In [0]:
list( collection.find( { "name" : { "$in": ["Kimberley Chase", "Kinney Wynn"] }}, ''))      # includes the names in find

In [0]:
collection.count_documents({"age" : { "$nin" : [""] } })     # all documents

##4.4 Relational operators

###4.4.1 Relational operators with numbers :  "greater than or equal", "greater than",  "equal", "little than", "little than or equal"

In [0]:
collection.count_documents({"age": {"$gte" : 38}})      # greater than or equal

In [0]:
collection.count_documents({"age": {"$gt" : 38}})       # greater than

In [0]:
collection.count_documents({"age": {"$eq" : 38}})       # equal

In [0]:
collection.count_documents({"age": {"$lt" : 38}})       # lower than

In [0]:
collection.count_documents({"age": {"$lte" : 38}})      # lower than or equal

###4.4.2 Relational operators with letters :  "greater than or equal"  or  "little than or equal"

Be careful, capital letters come before than lowercase letters in MongoDB

In [0]:
collection.count_documents({"name": {"$lt" : "B" }})   # Names which start with A

In [0]:
collection.insert_one({'name' : "B"})
collection.count_documents({"name": {"$lte" : "B" }})   # Names which start with A or is "B"    ## WARNING Uppercase the names before apply filter, 

In [0]:
collection.count_documents({"name": {"$gte" : "B" }})   # Names which start with letters B to Z 

In [0]:
collection.count_documents({"name": {"$gt" : "B" }})   # Names which are not "B" and start with letters B to Z 

In [0]:
collection.delete_one({'name' : "B"})

##4.5 Logical Query Operators

###4.5.1 AND

In [0]:
filters = { "$and":[ {"name" : "Kinney Wynn"}, {"age": 22} ]}
fields = {}      # if fields are empty, it shows by default the id

list ( collection.find( filters , fields ))

###4.5.2 OR

In [0]:
filters = {"$or":[ {"age" : 28}, {"age" : 29} ]}
fields = {}

collection.count_documents( filters , fields )

###4.5.3 AND & OR

In [0]:
filters = { "$and":[ 
                {"$or":[ {"name" : "Kinney Wynn"}, {"name" : "Kimberley Chase"}]},
                {"age": 22} 
            ]}
fields = {'name','age'}      # if fields are empty, it shows by default the id

list ( collection.find( filters , fields ))

##4.6 Exists & not exists

In [0]:
print(collection.count_documents({'_id' : {'$exists' : 1}}))  # Counts all documents with attribute '_id'

In [0]:
# Counts documents without attribute 'age'
print(collection.count_documents({'age' : {'$exists' : 0}}))

#5. List items that belong to a list or not

In [0]:
# Count documents with age equal to 28, 29 or 30
print(collection.count_documents({'age' : {'$in': [ 28, 29, 30]}}))

In [0]:
# Count documents with age different to 28, 29 or 30
print(collection.count_documents({'age' : {'$nin': [ 28, 29, 30]}}))

In [0]:
# Count documents with favorite fruit different to banana and apple
print(collection.count_documents({'favoriteFruit' : {'$nin': [ 'banana', 'apple']}}))


#6. Indexes
Adding indexes can help accelerate certain queries and can also add additional functionality to querying and storing documents.

##6.1 Index information

In [0]:
collection.index_information()                            # Shows the existing indexes

##6.2 Create index 

In [0]:
collection.create_index([( "age" , pymongo.ASCENDING)])   # Returns the name of the index

##6.3 Create index with unique registers

In [0]:
result = db.profiles.create_index([('user_id', pymongo.ASCENDING)], unique=True)  # unique index on a key that rejects documents whose value for that key already exists in the index
sorted(list(db.profiles.index_information()))

In [0]:
user_profiles = [ {'user_id': 211, 'name': 'Luke'}, {'user_id': 212, 'name': 'Ziltoid'}]
result = db.profiles.insert_many(user_profiles)
db.drop.profiles()

In [0]:
new_profile = {'user_id': 213, 'name': 'Drew'}
result = db.profiles.insert_one(new_profile)  # This is fine.

In [0]:
duplicate_profile = {'user_id': 212, 'name': 'Tommy'}
result = db.profiles.insert_one(duplicate_profile)

## Error :  DuplicateKeyError: E11000 duplicate key error index: people.profiles.$user_id_1 dup key: { : 212 }

## Congratulations,  you finished the part 2 !!
Find additional notebooks to learn pymongo in http://www.github.com/giffy/MongoDB_PyMongo_Tutorial
