In [1]:
%matplotlib inline

import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn

import pymongo

In [3]:
# As we have A LOT of data in mongodb cursor and
# pandas is still unable to create dataframe from generator object
# will pretend our cursor to be CSV fobj

class MongoCSVAdapter(object):
    
    def __init__(self, cursor, fields=None, splitter="/"):
        self.cursor = cursor
        self.fields = fields
        self.splitter = splitter
        self.times_called = 0
        self.values_per_time = 10
        self.line_ending = '\n'
    
    def read(self, n=0):
        try:
            self.times_called += 1
            values = [self._row_from_el(next(cursor)) + self.line_ending
                      for _ in range(self.values_per_time)]
            result = "".join(values)
            
            return result
        except StopIteration:
            return ''
    
    def _row_from_el(self, row):
        row = self.splitter.join((row[k] for k in self.fields)).strip()
        return row

In [4]:
client = pymongo.MongoClient()
fields = ['title', 'year', 'type', 'genre']
cursor = client.movies_db.movies_collection.find({}, fields)

adapter = MongoCSVAdapter(cursor, fields=fields)

In [None]:
# Get all movies from the collection
movies = pd.read_csv(adapter, sep='/', names=fields)

# We are only interested in movies
movies = movies[movies['type'] == 'movie']

# We are now not interested in type column so we may drop it
# for performance sake
movies.drop('type', axis=1, inplace=True)

In [18]:
movies

Unnamed: 0,title,year,genre
0,Carmencita,1894,"Documentary, Short"
1,Le clown et ses chiens,1892,"Animation, Short"
2,Pauvre Pierrot,1892,"Animation, Comedy, Short"
3,Un bon bock,1892,"Animation, Short"
4,Blacksmith Scene,1893,Short
5,Chinese Opium Den,1894,Short
6,Corbett and Courtney Before the Kinetograph,1894,"Short, Sport"
7,Edison Kinetoscopic Record of a Sneeze,1894,"Documentary, Short"
8,Miss Jerry,1894,Romance
9,Employees Leaving the Lumière Factory,1895,"Documentary, Short"
