Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Loading…

Added code ensuring indices exist on all collections #53

Merged
merged 3 commits into from

2 participants

@fccoelho
Owner

@turicas please check and see if that is enough.

@turicas

We may use the same syntax for creating indexes just for the sake of consistency.
I'd replace this line with: FEEDS.ensure_index([("subtitle_detail.base": pymongo.ASCENDING)]).

@turicas

Same here about consistency: ARTICLES.ensure_index([("published": pymongo.DESCENDING)]).

@turicas

Same here (syntax consistency) and on line 52.

@fccoelho fccoelho merged commit 98aeee4 into master
@fccoelho fccoelho deleted the fix/missing_indices branch
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Mar 9, 2014
  1. @fccoelho
Commits on Mar 24, 2014
  1. @fccoelho

    Fixed indices

    fccoelho authored
Commits on Apr 1, 2014
  1. @fccoelho

    minor edits

    fccoelho authored
This page is out of date. Refresh to see the latest.
View
2  Monitor/monitor_app/app.py
@@ -27,6 +27,7 @@
+
#----------------------------------------------------------------------------#
# App Config.
#----------------------------------------------------------------------------#
@@ -306,6 +307,7 @@ def json_timeline():
fixed_articles = []
for art in Articles:
art['published'] = datetime.date.fromtimestamp(art['published']['$date']/1000.).strftime("%d,%m,%Y")
+ print art['title']
fixed_articles.append(art)
dados = render_template('pages/timeline.json', busca='NAMD FGV', articles=fixed_articles)
View
9 capture/downloader.py
@@ -60,6 +60,15 @@
FEEDS = MCDB.feeds # Feed collection
ARTICLES = MCDB.articles # Article Collection
+## Ensure indices are created
+FEEDS.ensure_index([("subtitle_detail.base", pymongo.ASCENDING)])
+FEEDS.ensure_index([("last_visited", pymongo.DESCENDING), ("updated", pymongo.DESCENDING)])
+ARTICLES.ensure_index([("link", pymongo.ASCENDING), ("published", pymongo.ASCENDING)])
+ARTICLES.ensure_index([("published", pymongo.DESCENDING)])
+
+
+
+
config = {
'threads': 35, # Number of threads used in the fetching pool
}
View
20 capture/extract_feeds.py
@@ -11,15 +11,15 @@
import argparse
import logging
-import pymongo
from pymongo.errors import OperationFailure
-
import pymongo
+
import feedfinder
import urlscanner
import settings
+
###########################
# Setting up Logging
###########################
@@ -43,8 +43,24 @@
MCDB = client.MCDB
URLS = MCDB.urls # Feed collection
+## Ensure indices are created
+FEEDS = MCDB.feeds
+ARTICLES = MCDB.articles
+FEEDS.ensure_index([("subtitle_detail.base", pymongo.ASCENDING)])
+# the index below is key to ensure uniqueness of feeds in the table
+FEEDS.ensure_index([("subtitle_detail.base", pymongo.ASCENDING), ("link", pymongo.ASCENDING)], unique=True, dropDups=True)
+FEEDS.ensure_index([("last_visited", pymongo.DESCENDING), ("updated", pymongo.DESCENDING)])
+ARTICLES.ensure_index([("link", pymongo.ASCENDING), ("published", pymongo.ASCENDING)])
+ARTICLES.ensure_index([("published", pymongo.DESCENDING)])
+
+
def main(urls, depth):
+ """
+ Starting capturing of
+ :param urls:
+ :param depth:
+ """
if urls:
with open(urls) as f:
for u in f:
Something went wrong with that request. Please try again.