## Introduction to Databases

### Using [MongoDB](https://docs.mongodb.com/manual/administration/install-community/)

### [MongoDB Cheat Sheet](https://www.mongodb.com/developer/quickstart/cheat-sheet/) (Command line)

#### Connect to MongoDB Shell
```
mongo 
```
Obs: connects to mongodb://127.0.0.1:27017 by default  
```
mongo --host <host> --port < port > -u < user > -p < pwd >  
```
Obs: omit the password if you want a prompt  
```
mongo "mongodb://192.168.1.1:27017"  
```    
#### Show Databases
```  
show dbs
``` 
#### Switch Database  
```    
use < database_name > 
```
#### Show Collections
```    
show collections
```    
#### Run JavaScript File
```    
load("myScript.js")    
```

#### Create Databases (for a database 'db', and a collection 'coll')
```
db.coll.insertOne({name: "Max"})  
db.coll.insert([{name: "Max"}, {name:"Alex"}]) // ordered bulk insert  
db.coll.insert([{name: "Max"}, {name:"Alex"}], {ordered: false}) // unordered bulk insert  
db.coll.insert({date: ISODate()})  
db.coll.insert({name: "Max"}, {"writeConcern": {"w": "majority", "wtimeout": 5000}}) 
```

#### Read Operations

```
db.coll.findOne() // returns a single document  
db.coll.find()    // returns a cursor - show 20 results - "it" to display more  
db.coll.find().pretty()  
db.coll.find({name: "Max", age: 32}) // implicit logical "AND".  
db.coll.find({date: ISODate("2020-09-25T13:57:17.180Z")})  
db.coll.find({name: "Max", age: 32}).explain("executionStats") // or "queryPlanner" or "allPlansExecution"  
db.coll.distinct("name")  
```
#### Count  
```
db.coll.count({age: 32})          // estimation based on collection metadata  
db.coll.estimatedDocumentCount()  // estimation based on collection metadata  
db.coll.countDocuments({age: 32}) // alias for an aggregation pipeline - accurate count  
```
#### Comparison  
```
db.coll.find({"year": {$gt: 1970}})  
db.coll.find({"year": {$gte: 1970}})  
db.coll.find({"year": {$lt: 1970}})  
db.coll.find({"year": {$lte: 1970}})  
db.coll.find({"year": {$ne: 1970}})  
db.coll.find({"year": {$in: [1958, 1959]}})  
db.coll.find({"year": {$nin: [1958, 1959]}})  
```  

#### Logical   

```
db.coll.find({name:{$not: {$eq: "Max"}}})
db.coll.find({$or: [{"year" : 1958}, {"year" : 1959}]})
db.coll.find({$nor: [{price: 1.99}, {sale: true}]})
db.coll.find({$and: [{$or: [{qty: {$lt :10}}, {qty :{$gt: 50}}]}, {$or: [{sale: true}, {price: {$lt: 5 }}]}]})  
```

#### Element   

```
db.coll.find({name: {$exists: true}})  
db.coll.find({"zipCode": {$type: 2 }})  
db.coll.find({"zipCode": {$type: "string"}})  
```

#### Aggregation Pipeline  

```  
db.coll.aggregate([{$match: {status: "A"}},{$group: {_id: "$cust_id", total: {$sum: "$amount"}}},{$sort: {total: -1}}])  
```  

#### Text search with a "text" index

```
db.coll.find({$text: {$search: "cake"}}, {score: {$meta: "textScore"}}).sort({score: {$meta: "textScore"}})  
```  

#### Regex  
```
db.coll.find({name: /^Max/})   // regex: starts by letter "M"  
db.coll.find({name: /^Max$/i}) // regex case insensitive  

```

#### Array  

```
db.coll.find({tags: {$all: ["Realm", "Charts"]}})  
db.coll.find({field: {$size: 2}}) // impossible to index - prefer storing the size of the array & update it  
db.coll.find({results: {$elemMatch: {product: "xyz", score: {$gte: 8}}}})  
```

#### Projections  

```
db.coll.find({"x": 1}, {"actors": 1})               // actors + _id   
db.coll.find({"x": 1}, {"actors": 1, "_id": 0})     // actors   
db.coll.find({"x": 1}, {"actors": 0, "summary": 0}) // all but "actors" and "summary"   
```

#### Sort, skip, limit  

```
db.coll.find({}).sort({"year": 1, "rating": -1}).skip(10).limit(3)  
```

#### Read Concern  

```
db.coll.find().readConcern("majority")   
```

#### Update Operations

```
db.coll.update({"_id": 1}, {"year": 2016}) // WARNING! Replaces the entire document
db.coll.update({"_id": 1}, {$set: {"year": 2016, name: "Max"}})
db.coll.update({"_id": 1}, {$unset: {"year": 1}})
db.coll.update({"_id": 1}, {$rename: {"year": "date"} })
db.coll.update({"_id": 1}, {$inc: {"year": 5}})
db.coll.update({"_id": 1}, {$mul: {price: NumberDecimal("1.25"), qty: 2}})
db.coll.update({"_id": 1}, {$min: {"imdb": 5}})
db.coll.update({"_id": 1}, {$max: {"imdb": 8}})
db.coll.update({"_id": 1}, {$currentDate: {"lastModified": true}})
db.coll.update({"_id": 1}, {$currentDate: {"lastModified": {$type: "timestamp"}}})
```

#### Array  

```
db.coll.update({"_id": 1}, {$push :{"array": 1}})
db.coll.update({"_id": 1}, {$pull :{"array": 1}})
db.coll.update({"_id": 1}, {$addToSet :{"array": 2}})
db.coll.update({"_id": 1}, {$pop: {"array": 1}})  // last element
db.coll.update({"_id": 1}, {$pop: {"array": -1}}) // first element
db.coll.update({"_id": 1}, {$pullAll: {"array" :[3, 4, 5]}})
db.coll.update({"_id": 1}, {$push: {scores: {$each: [90, 92, 85]}}})
db.coll.updateOne({"_id": 1, "grades": 80}, {$set: {"grades.$": 82}})
db.coll.updateMany({}, {$inc: {"grades.$[]": 10}})
db.coll.update({}, {$set: {"grades.$[element]": 100}}, {multi: true, arrayFilters: [{"element": {$gte: 100}}]})
```  

#### Update many

```
db.coll.update({"year": 1999}, {$set: {"decade": "90's"}}, {"multi":true})
db.coll.updateMany({"year": 1999}, {$set: {"decade": "90's"}})
```

#### FindOneAndUpdate

```
db.coll.findOneAndUpdate({"name": "Max"}, {$inc: {"points": 5}}, {returnNewDocument: true})
```
#### Upsert

```
db.coll.update({"_id": 1}, {$set: {item: "apple"}, $setOnInsert: {defaultQty: 100}}, {upsert: true})
```

#### Replace

```
db.coll.replaceOne({"name": "Max"}, {"firstname": "Maxime", "surname": "Beugnet"})
```

#### Save

```
db.coll.save({"item": "book", "qty": 40})
```

#### Write concern

```
db.coll.update({}, {$set: {"x": 1}}, {"writeConcern": {"w": "majority", "wtimeout": 5000}})
```

#### Delete  

```
db.coll.remove({name: "Max"})
db.coll.remove({name: "Max"}, {justOne: true})
db.coll.remove({}) // WARNING! Deletes all the docs but not the collection itself and its index definitions
db.coll.remove({name: "Max"}, {"writeConcern": {"w": "majority", "wtimeout": 5000}})
db.coll.findOneAndDelete({"name": "Max"})
```

### Databases and Collections  
#### Drop  

```
db.coll.drop()    // removes the collection and its index definitions
db.dropDatabase() // double check that you are *NOT* on the PROD cluster... :-)
```

#### Create Collection  

```
// Create collection with a $jsonschema
db.createCollection("contacts", {
   validator: {$jsonSchema: {
      bsonType: "object",
      required: ["phone"],
      properties: {
         phone: {
            bsonType: "string",
            description: "must be a string and is required"
         },
         email: {
            bsonType: "string",
            pattern: "@mongodb\.com$",
            description: "must be a string and match the regular expression pattern"
         },
         status: {
            enum: [ "Unknown", "Incomplete" ],
            description: "can only be one of the enum values"
         }
      }
   }}
})  
```

#### Other Collection Functions  

```  
db.coll.stats()
db.coll.storageSize()
db.coll.totalIndexSize()
db.coll.totalSize()
db.coll.validate({full: true})
db.coll.renameCollection("new_coll", true) // 2nd parameter to drop the target collection if exists
```  

#### Indexes  
#### List Indexes  

```  
db.coll.getIndexes()
db.coll.getIndexKeys()
```  

#### Create Indexes  

```  
// Index Types
db.coll.createIndex({"name": 1})                // single field index
db.coll.createIndex({"name": 1, "date": 1})     // compound index
db.coll.createIndex({foo: "text", bar: "text"}) // text index
db.coll.createIndex({"$**": "text"})            // wildcard text index
db.coll.createIndex({"userMetadata.$**": 1})    // wildcard index
db.coll.createIndex({"loc": "2d"})              // 2d index
db.coll.createIndex({"loc": "2dsphere"})        // 2dsphere index
db.coll.createIndex({"_id": "hashed"})          // hashed index

// Index Options
db.coll.createIndex({"lastModifiedDate": 1}, {expireAfterSeconds: 3600})      // TTL index
db.coll.createIndex({"name": 1}, {unique: true})
db.coll.createIndex({"name": 1}, {partialFilterExpression: {age: {$gt: 18}}}) // partial index
db.coll.createIndex({"name": 1}, {collation: {locale: 'en', strength: 1}})    // case insensitive index with strength = 1 or 2
db.coll.createIndex({"name": 1 }, {sparse: true})
```

#### Drop Indexes  

```
db.coll.dropIndex("name_1")  
```

#### Hide/Unhide Indexes  

```
db.coll.hideIndex("name_1")
db.coll.unhideIndex("name_1")
```

#### Handy commands  

```
use admin
db.createUser({"user": "root", "pwd": passwordPrompt(), "roles": ["root"]})
db.dropUser("root")
db.auth( "user", passwordPrompt() )

use test
db.getSiblingDB("dbname")
db.currentOp()
db.killOp(123) // opid

db.fsyncLock()
db.fsyncUnlock()

db.getCollectionNames()
db.getCollectionInfos()
db.printCollectionStats()
db.stats()

db.getReplicationInfo()
db.printReplicationInfo()
db.isMaster()
db.hostInfo()
db.printShardingStatus()
db.shutdownServer()
db.serverStatus()

db.setSlaveOk()
db.getSlaveOk()

db.getProfilingLevel()
db.getProfilingStatus()
db.setProfilingLevel(1, 200) // 0 == OFF, 1 == ON with slowms, 2 == ON

db.enableFreeMonitoring()
db.disableFreeMonitoring()
db.getFreeMonitoringStatus()

db.createView("viewName", "sourceColl", [{$project:{department: 1}}])
```

## Using Mongo via Python API

In [1]:
#!pip install -U -q pymongo

In [2]:
import datetime
import pprint

import pymongo
from pymongo import MongoClient

### Making a Connection with MongoClient

The first step when working with PyMongo is to create a MongoClient to the running mongod instance. Doing so is easy:

In [3]:
client = MongoClient('localhost', 27017)

In [4]:
client.list_database_names()

['admin', 'config', 'local']

## Getting/Creating a Database

A single instance of MongoDB can support multiple independent databases. When working with PyMongo you access databases using attribute style access on MongoClient instances:

In [5]:
db = client.test_database
#db = client['test_database']

## Check if Database Exists

In [13]:
# The database only appears after we insert one collection/document

print(client.list_database_names())

['admin', 'config', 'local']


In [14]:
dblist = client.list_database_names()
if "test_database" in dblist:
    print("The database exists.")

## Getting a Collection

A collection is a group of documents stored in MongoDB, and can be thought of as roughly the equivalent of a table in a relational database. Getting a collection in PyMongo works the same as getting a database:

In [15]:
collection = db.test_collection
#collection = db['test_collection']

In [16]:
# The collection only appears after we insert one document

print(db.list_collection_names())

[]


## Documents

Data in MongoDB is represented (and stored) using JSON-style documents. In PyMongo we use dictionaries to represent documents. As an example, the following dictionary might be used to represent a blog post:

In [17]:
post = {"author": "Mike",
        "text": "My first blog post!",
        "tags": ["mongodb", "python", "pymongo"],
        "date": datetime.datetime.utcnow()}

## Creating another Collection and inserting a Document

To insert a document into a collection we can use the insert_one() method:

In [18]:
posts = db.posts
post_id = posts.insert_one(post).inserted_id
post_id

ObjectId('6168018542062de482ac0719')

When a document is inserted a special key, "_id", is automatically added if the document doesn’t already contain an "_id" key. The value of "_id" must be unique across the collection. insert_one() returns an instance of InsertOneResult. For more information on "_id", see the documentation on _id.

After inserting the first document, the posts collection has actually been created on the server. We can verify this by listing all of the collections in our database:

In [19]:
print(client.list_database_names())
print(db.list_collection_names())

['admin', 'config', 'local', 'test_database']
['posts']


## Getting a Single Document With find_one()

The most basic type of query that can be performed in MongoDB is find_one(). This method returns a single document matching a query (or None if there are no matches). It is useful when you know there is only one matching document, or are only interested in the first match. Here we use find_one() to get the first document from the posts collection:

In [20]:
posts.find_one()

{'_id': ObjectId('6168018542062de482ac0719'),
 'author': 'Mike',
 'text': 'My first blog post!',
 'tags': ['mongodb', 'python', 'pymongo'],
 'date': datetime.datetime(2021, 10, 14, 10, 7, 24, 160000)}

In [21]:
result1 = posts.find_one()
print(type(result1))

<class 'dict'>


The result is a dictionary matching the one that we inserted previously.

Note: The returned document contains an "_id", which was automatically added on insert.

find_one() also supports querying on specific elements that the resulting document must match. To limit our results to a document with author “Mike” we do:

In [22]:
posts.find_one({"author": "Mike"})

{'_id': ObjectId('6168018542062de482ac0719'),
 'author': 'Mike',
 'text': 'My first blog post!',
 'tags': ['mongodb', 'python', 'pymongo'],
 'date': datetime.datetime(2021, 10, 14, 10, 7, 24, 160000)}

In [23]:
posts.find_one({"text": "My first blog post!"})

{'_id': ObjectId('6168018542062de482ac0719'),
 'author': 'Mike',
 'text': 'My first blog post!',
 'tags': ['mongodb', 'python', 'pymongo'],
 'date': datetime.datetime(2021, 10, 14, 10, 7, 24, 160000)}

In [24]:
posts.find_one({"tags": "python"})

{'_id': ObjectId('6168018542062de482ac0719'),
 'author': 'Mike',
 'text': 'My first blog post!',
 'tags': ['mongodb', 'python', 'pymongo'],
 'date': datetime.datetime(2021, 10, 14, 10, 7, 24, 160000)}

In [25]:
posts.find_one({'date': datetime.datetime(2021, 2, 13, 12, 37, 2, 805000)})

In [28]:
posts.find_one({'date': datetime.datetime(2021, 10, 14, 10, 7, 24, 160000)})

{'_id': ObjectId('6168018542062de482ac0719'),
 'author': 'Mike',
 'text': 'My first blog post!',
 'tags': ['mongodb', 'python', 'pymongo'],
 'date': datetime.datetime(2021, 10, 14, 10, 7, 24, 160000)}

In [29]:
posts.find_one({"author": "Eliot"})

In [30]:
posts.find_one({"tags": "R"})

## Querying By ObjectId

We can also find a post by its _id, which in our example is an ObjectId:

In [31]:
post_id

ObjectId('6168018542062de482ac0719')

In [32]:
pprint.pprint(posts.find_one({"_id": post_id}))

{'_id': ObjectId('6168018542062de482ac0719'),
 'author': 'Mike',
 'date': datetime.datetime(2021, 10, 14, 10, 7, 24, 160000),
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}


Note that an ObjectId is not the same as its string representation:

In [33]:
str(post_id)

'6168018542062de482ac0719'

In [34]:
posts.find_one({"_id": str(post_id)})

In [35]:
type(post_id)

bson.objectid.ObjectId

A common task in web applications is to get an ObjectId from the request URL and find the matching document. It’s necessary in this case to convert the ObjectId from a string before passing it to find_one:

In [36]:
from bson.objectid import ObjectId

# The web framework gets post_id from the URL and passes it as a string
def get(post_id):
    # Convert from string to ObjectId:
    document = client.db.collection.find_one({'_id': ObjectId(post_id)})

## Bulk Inserts

In order to make [querying](https://docs.mongodb.com/manual/reference/operator/query/) a little more interesting, let’s insert a few more documents. In addition to inserting a single document, we can also perform bulk insert operations, by passing a list as the first argument to insert_many(). This will insert each document in the list, sending only a single command to the server:

In [37]:
new_posts = [{"author": "Mike",
              "text": "Another post!",
              "tags": ["bulk", "insert"],
              "date": datetime.datetime(2009, 11, 12, 11, 14)},
             
             {"author": "Eliot",
              "title": "MongoDB is fun",
              "text": "and pretty easy too!",
              "date": datetime.datetime(2009, 11, 10, 10, 45)}]

result = posts.insert_many(new_posts)
result.inserted_ids

[ObjectId('6168031a42062de482ac071a'), ObjectId('6168031a42062de482ac071b')]

There are a couple of interesting things to note about this example:

+ The result from insert_many() now returns two ObjectId instances, one for each inserted document.
+ new_posts[1] has a different “shape” than the other posts - there is no "tags" field and we’ve added a new field, "title". This is what we mean when we say that MongoDB is __schema-free__.  

## Querying for More Than One Document

To get more than a single document as the result of a query we use the find() method. find() returns a Cursor instance, which allows us to iterate over all matching documents. For example, we can iterate over every document in the posts collection:

In [38]:
for post in posts.find():
    pprint.pprint(post)

{'_id': ObjectId('6168018542062de482ac0719'),
 'author': 'Mike',
 'date': datetime.datetime(2021, 10, 14, 10, 7, 24, 160000),
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}
{'_id': ObjectId('6168031a42062de482ac071a'),
 'author': 'Mike',
 'date': datetime.datetime(2009, 11, 12, 11, 14),
 'tags': ['bulk', 'insert'],
 'text': 'Another post!'}
{'_id': ObjectId('6168031a42062de482ac071b'),
 'author': 'Eliot',
 'date': datetime.datetime(2009, 11, 10, 10, 45),
 'text': 'and pretty easy too!',
 'title': 'MongoDB is fun'}


Just like we did with find_one(), we can pass a document to find() to limit the returned results. Here, we get only those documents whose author is “Mike”:

In [39]:
for post in posts.find({"author": "Mike"}):
    pprint.pprint(post)

{'_id': ObjectId('6168018542062de482ac0719'),
 'author': 'Mike',
 'date': datetime.datetime(2021, 10, 14, 10, 7, 24, 160000),
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}
{'_id': ObjectId('6168031a42062de482ac071a'),
 'author': 'Mike',
 'date': datetime.datetime(2009, 11, 12, 11, 14),
 'tags': ['bulk', 'insert'],
 'text': 'Another post!'}


### [Advanced queries](https://docs.mongodb.com/manual/reference/operator/query/)

## Counting

If we just want to know how many documents match a query we can perform a count_documents() operation instead of a full query. We can get a count of all of the documents in a collection:

In [40]:
posts.count_documents({})

3

In [41]:
posts.count_documents({"author": "Mike"})

2

In [42]:
posts.count_documents({"author": "Mike", 'tags':'bulk'})

1

## Range Queries

MongoDB supports many different types of advanced queries.  
As an example, lets perform a query where we limit results to posts older than a certain date, but also sort the results by author:  
Here we use the special "$lt" operator to do a range query, and also call sort() to sort the results by author.

In [44]:
from IPython.core.display import display, HTML
display(HTML('''
<table class="math-table">
<thead>
<tr><th>Operation</th><th>Syntax</th><th>Description</th></tr>
</thead>
<tbody>
<tr><td>Equality</td><td>{“key” : “value”}</td><td>Matches values that are equal to a specified value.</td></tr>
<tr><td>Less Than</td><td>{“key” :{$lt:”value”}}</td><td>Matches values that are less than a specified value.</td></tr>
<tr><td>Greater Than</td><td>{“key” :{$gt:”value”}}</td><td>Matches values that are greater than a specified value.</td></tr>
<tr><td>Less Than Equal to</td><td>{“key” :{$lte:”value”}}</td><td>Matches values that are less than or equal to a specified value.</td></tr>
<tr><td>Greater Than Equal to</td><td>{“key” :{$lte:”value”}}</td><td>Matches values that are greater than or equal to a specified value.</td></tr>
<tr><td>Not Equal to</td><td>{“key”:{$ne: “value”}}</td><td>Matches all values that are not equal to a specified value.</td></tr>
<tr><td>Logical AND</td><td>{ “$and”:[{exp1}, {exp2}, …, {expN}] }</td><td>Joins query clauses with a logical AND returns all documents that match the conditions of both clauses.</td></tr>
<tr><td>Logical OR</td><td>{ “$or”:[{exp1}, {&lt;exp2}, …, {expN}] }</td><td>Joins query clauses with a logical OR returns all documents that match the conditions of either clause.</td></tr>
<tr><td>Logical NOT</td><td>{ “$not”:[{exp1}, {exp2}, …, {expN}] }</td><td>Inverts the effect of a query expression and returns documents that do not match the query expression.</td></tr>
</tbody>
</table>
'''))

Operation,Syntax,Description
Equality,{“key” : “value”},Matches values that are equal to a specified value.
Less Than,{“key” :{$lt:”value”}},Matches values that are less than a specified value.
Greater Than,{“key” :{$gt:”value”}},Matches values that are greater than a specified value.
Less Than Equal to,{“key” :{$lte:”value”}},Matches values that are less than or equal to a specified value.
Greater Than Equal to,{“key” :{$lte:”value”}},Matches values that are greater than or equal to a specified value.
Not Equal to,{“key”:{$ne: “value”}},Matches all values that are not equal to a specified value.
Logical AND,"{ “$and”:[{exp1}, {exp2}, …, {expN}] }",Joins query clauses with a logical AND returns all documents that match the conditions of both clauses.
Logical OR,"{ “$or”:[{exp1}, {<exp2}, …, {expN}] }",Joins query clauses with a logical OR returns all documents that match the conditions of either clause.
Logical NOT,"{ “$not”:[{exp1}, {exp2}, …, {expN}] }",Inverts the effect of a query expression and returns documents that do not match the query expression.


In [45]:
d = datetime.datetime(2009, 11, 12, 12)
for post in posts.find({"date": {"$lt": d}}).sort("author"):
    pprint.pprint(post)

{'_id': ObjectId('6168031a42062de482ac071b'),
 'author': 'Eliot',
 'date': datetime.datetime(2009, 11, 10, 10, 45),
 'text': 'and pretty easy too!',
 'title': 'MongoDB is fun'}
{'_id': ObjectId('6168031a42062de482ac071a'),
 'author': 'Mike',
 'date': datetime.datetime(2009, 11, 12, 11, 14),
 'tags': ['bulk', 'insert'],
 'text': 'Another post!'}


## Indexing

Adding indexes can help accelerate certain queries and can also add additional functionality to querying and storing documents.  
In this example, we’ll demonstrate how to create a unique index on a key that rejects documents whose value for that key already exists in the index.

First, we’ll need to create the index:

In [46]:
result = db.profiles.create_index([('user_id', pymongo.ASCENDING)], unique=True)
sorted(list(db.profiles.index_information()))

['_id_', 'user_id_1']

Notice that we have two indexes now: one is the index on _id that MongoDB creates automatically, and the other is the index on user_id we just created.

Now let’s set up some user profiles:

In [47]:
user_profiles = [{'user_id': 211, 'name': 'Luke'},
                 {'user_id': 212, 'name': 'Ziltoid'}]

result = db.profiles.insert_many(user_profiles)

The index prevents us from inserting a document whose user_id is already in the collection:

In [48]:
new_profile = {'user_id': 213, 'name': 'Drew'}
duplicate_profile = {'user_id': 212, 'name': 'Tommy'}

In [49]:
result = db.profiles.insert_one(new_profile)  # This is fine.

In [50]:
result = db.profiles.insert_one(duplicate_profile) ## error

DuplicateKeyError: E11000 duplicate key error collection: test_database.profiles index: user_id_1 dup key: { user_id: 212 }, full error: {'index': 0, 'code': 11000, 'keyPattern': {'user_id': 1}, 'keyValue': {'user_id': 212}, 'errmsg': 'E11000 duplicate key error collection: test_database.profiles index: user_id_1 dup key: { user_id: 212 }'}

## Delete Document / Collection / Database

You can delete a document, a collection or a database in MongoDB, by using the methods:
+ delete_one()
+ delete_many()
+ drop() 
+ drop_database()

### Delete Document(s)

In [51]:
# db = client.test_database
mycol = db["customers"]

mylist = [{ "_id": 1, "name": "John", "address": "Highway 37"},
          { "_id": 2, "name": "Peter", "address": "Lowstreet 27"},
          { "_id": 3, "name": "Amy", "address": "Apple st 652"},
          { "_id": 4, "name": "Hannah", "address": "Mountain 21"},
          { "_id": 5, "name": "Michael", "address": "Valley 345"},
          { "_id": 6, "name": "Sandy", "address": "Ocean blvd 2"},
          { "_id": 7, "name": "Betty", "address": "Green Grass 1"},
          { "_id": 8, "name": "Richard", "address": "Sky st 331"},
          { "_id": 9, "name": "Susan", "address": "One way 98"},
          { "_id": 10, "name": "Vicky", "address": "Yellow Garden 2"},
          { "_id": 11, "name": "Ben", "address": "Park Lane 38"},
          { "_id": 12, "name": "William", "address": "Central st 954"},
          { "_id": 13, "name": "Chuck", "address": "Main Road 989"},
          { "_id": 14, "name": "Viola", "address": "Sideway 1633"}
         ]

customers = mycol.insert_many(mylist)

#print list of the _id values of the inserted documents:
print(customers.inserted_ids)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]


Deleting one

In [52]:
myquery = {"address": "Mountain 21"}
mycol.delete_one(myquery)

<pymongo.results.DeleteResult at 0x7f0ce4409cc0>

Deleting many

In [53]:
myquery = {"address": {"$regex": "^S"}}
deleted = mycol.delete_many(myquery)

print(deleted.deleted_count, " documents deleted.")

2  documents deleted.


In [54]:
deleted.raw_result

{'n': 2, 'ok': 1.0}

In [55]:
for entry in mycol.find():
    print(entry)

{'_id': 1, 'name': 'John', 'address': 'Highway 37'}
{'_id': 2, 'name': 'Peter', 'address': 'Lowstreet 27'}
{'_id': 3, 'name': 'Amy', 'address': 'Apple st 652'}
{'_id': 5, 'name': 'Michael', 'address': 'Valley 345'}
{'_id': 6, 'name': 'Sandy', 'address': 'Ocean blvd 2'}
{'_id': 7, 'name': 'Betty', 'address': 'Green Grass 1'}
{'_id': 9, 'name': 'Susan', 'address': 'One way 98'}
{'_id': 10, 'name': 'Vicky', 'address': 'Yellow Garden 2'}
{'_id': 11, 'name': 'Ben', 'address': 'Park Lane 38'}
{'_id': 12, 'name': 'William', 'address': 'Central st 954'}
{'_id': 13, 'name': 'Chuck', 'address': 'Main Road 989'}


Deleting all

In [56]:
deleted = mycol.delete_many({})

In [57]:
deleted.raw_result

{'n': 11, 'ok': 1.0}

### Delete Collection

In [58]:
print(db.list_collection_names())

['customers', 'profiles', 'posts']


In [59]:
posts.drop()

In [60]:
print(db.list_collection_names())

['customers', 'profiles']


### Delete Database

In [61]:
print(client.list_database_names())

['admin', 'config', 'local', 'test_database']


In [62]:
client.drop_database('test_database')

In [63]:
print(client.list_database_names())

['admin', 'config', 'local']
