In [1]:
# Importing modules

import pymongo as pygo
from pymongo import MongoClient
import pandas as pd

## # Creating a connection object.

In [2]:
# Connecting to MongoDB server.

connection = MongoClient('localhost', 27017)
# connection = MongoClient('remote_server string')

In [4]:
# retrieving informating about the server.

connection.test_database

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'test_database')

In [5]:
# getting all database names on the server.

connection.list_database_names()

['acme', 'admin', 'config', 'horse_barn', 'local']

## # Creating a DataBasse object.

In [6]:
# switching to a DB in server.

db_horse_barn = connection["horse_barn"]

In [7]:
# listing all the collections in the horse_barn DB.

db_horse_barn.list_collection_names()

['unicorns']

## # Creating a collection object.

In [8]:
# creating a collection instance, to work on that collection in the Database.

unicorns = db_horse_barn["unicorns"]

# CRUD operations

### # Database.create_collection(collection_name:str)

In [19]:
# Creates a collection in a database.

db_horse_barn.create_collection("Stallions")

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'horse_barn'), 'Stallions')

### # Collection.insert_one(doc:dict)

In [19]:
# inserting a single document into a collection

doc = {'name': 'Jack', 'weight': 500, 'gender': 'f', 'vampires': 50}
unicorns.insert_one(doc)

<pymongo.results.InsertOneResult at 0x9faf448>

### # Collection.insert_many(docs:list)

In [20]:
# inserting multiple documents in a collection.

doc1 = {'name': 'Mack', 'weight': 550, 'gender': 'm', 'vampires': 53}
doc2 = {'name': 'Ronny', 'weight': 600, 'gender': 'm', 'vampires': 100}

unicorns.insert_many([doc1, doc2])

<pymongo.results.InsertManyResult at 0x92c3c08>

### # Collection.find(condition:dict)

In [32]:
# Retrieving a document from a collection.
# .find method returns a Cursor object.

results = unicorns.find({'name': 'Horny'})

In [22]:
print(results)

<pymongo.cursor.Cursor object at 0x00000000093D6FC8>


In [34]:
# Iterating over a cursor.

for result in results:
    print(result)

{'_id': ObjectId('6087eb02f03b34de185a6c59'), 'name': 'Horny', 'dob': datetime.datetime(1992, 3, 13, 2, 17), 'loves': ['carrot', 'papaya', 'Pineapple'], 'weight': 600.0, 'gender': 'm', 'vampires': 63.0, 'vaccinated': True}


### # Collection.delete_one(condition:dict)

In [35]:
# Deleting a single document that satisfies the condition.
# It will delete the first document it comes across and leave the rest.

unicorns.delete_one({'name': 'Jack'})

<pymongo.results.DeleteResult at 0x9949888>

### # Collection.delete_many(condition:dict)

In [36]:
# deletes all documents that satisfy the condition.

unicorns.delete_many({'name': 'Jack'})

<pymongo.results.DeleteResult at 0x99be708>

### # Collection.update_one(condition:dict, update:dict)

In [9]:
# Updates a single document that satisfies the condition with the passed document(2nd arg)
# Updates first document that it comes across and leaves the rest.

unicorns.update_one({'name': 'Ronny'}, {'$set': {'vaccinated': True}})

<pymongo.results.UpdateResult at 0x131ac308>

In [39]:
# Gives an estimated count of number of documents in a collection.

unicorns.estimated_document_count()

14

## # Cursor to DataFrame

In [14]:
cursor = unicorns.find()

# converting the cursor into a list of dictionaries.
list_cur = list(cursor)
list_cur[0]

{'_id': ObjectId('6087eb02f03b34de185a6c59'),
 'name': 'Horny',
 'dob': datetime.datetime(1992, 3, 13, 2, 17),
 'loves': ['carrot', 'papaya', 'Pineapple'],
 'weight': 600.0,
 'gender': 'm',
 'vampires': 63.0,
 'vaccinated': True}

In [13]:
df = pd.DataFrame(list_cur)
df

Unnamed: 0,_id,name,dob,loves,weight,gender,vampires,vaccinated
0,6087eb02f03b34de185a6c59,Horny,1992-03-13 02:17:00,"[carrot, papaya, Pineapple]",600.0,m,63.0,True
1,6087ebc99aa6314e1b627ab4,Aurora,1991-01-24 07:30:00,"[carrot, grape]",450.0,f,43.0,True
2,6087ebc99aa6314e1b627ab5,Unicrom,1973-02-09 16:40:00,"[energon, redbull]",984.0,m,182.0,True
3,6087ebc99aa6314e1b627ab6,Roooooodles,1979-08-18 13:14:00,[apple],575.0,m,99.0,True
4,6087ebd09aa6314e1b627ab7,Solnara,1985-07-03 20:31:00,"[apple, carrot, chocolate]",550.0,f,80.0,True
5,6087ec2e9aa6314e1b627ab8,Kenny,1997-07-01 05:12:00,"[grape, lemon]",690.0,m,39.0,True
6,6087ec2e9aa6314e1b627ab9,Raleigh,2005-05-02 19:27:00,"[apple, sugar]",421.0,m,2.0,True
7,6087ec2e9aa6314e1b627aba,Leia,2001-10-08 09:23:00,"[apple, watermelon]",601.0,f,33.0,True
8,6087ec2e9aa6314e1b627abb,Pilot,1997-02-28 23:33:00,"[apple, watermelon]",650.0,m,54.0,True
9,6087ec2e9aa6314e1b627abc,Nimue,1999-12-20 10:45:00,"[grape, carrot, Pineapple]",590.0,f,,True


In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14 entries, 0 to 13
Data columns (total 8 columns):
_id           14 non-null object
name          14 non-null object
dob           12 non-null datetime64[ns]
loves         11 non-null object
weight        14 non-null float64
gender        14 non-null object
vampires      13 non-null float64
vaccinated    14 non-null bool
dtypes: bool(1), datetime64[ns](1), float64(2), object(4)
memory usage: 926.0+ bytes


In [17]:
df['gender'] = df['gender'].astype('category')

In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14 entries, 0 to 13
Data columns (total 8 columns):
_id           14 non-null object
name          14 non-null object
dob           12 non-null datetime64[ns]
loves         11 non-null object
weight        14 non-null float64
gender        14 non-null category
vampires      13 non-null float64
vaccinated    14 non-null bool
dtypes: bool(1), category(1), datetime64[ns](1), float64(2), object(3)
memory usage: 924.0+ bytes


# Cursor to JSON

In [28]:
# importing some JSON utils
# bson is installed with pymongo

from bson.json_util import loads, dumps
import json

In [31]:
# convertign the list of dictionaries into an array of documents

jason_data = dumps(list_cur, indent=2)
jason_data

'[\n  {\n    "_id": {\n      "$oid": "6087eb02f03b34de185a6c59"\n    },\n    "name": "Horny",\n    "dob": {\n      "$date": 700453020000\n    },\n    "loves": [\n      "carrot",\n      "papaya",\n      "Pineapple"\n    ],\n    "weight": 600.0,\n    "gender": "m",\n    "vampires": 63.0,\n    "vaccinated": true\n  },\n  {\n    "_id": {\n      "$oid": "6087ebc99aa6314e1b627ab4"\n    },\n    "name": "Aurora",\n    "dob": {\n      "$date": 664702200000\n    },\n    "loves": [\n      "carrot",\n      "grape"\n    ],\n    "weight": 450.0,\n    "gender": "f",\n    "vampires": 43.0,\n    "vaccinated": true\n  },\n  {\n    "_id": {\n      "$oid": "6087ebc99aa6314e1b627ab5"\n    },\n    "name": "Unicrom",\n    "dob": {\n      "$date": 98124000000\n    },\n    "loves": [\n      "energon",\n      "redbull"\n    ],\n    "weight": 984.0,\n    "gender": "m",\n    "vampires": 182.0,\n    "vaccinated": true\n  },\n  {\n    "_id": {\n      "$oid": "6087ebc99aa6314e1b627ab6"\n    },\n    "name": "Rooooood

In [27]:
type(jason_data)

str

In [32]:
with open('unicorn.json', mode='w') as file_:
    json.dump(jason_data, file_)