Permalink
Browse files

refactored to fix/update cache problems

  • Loading branch information...
1 parent c5d6b47 commit a30f3995bb064409c263d9a331c9d8de00d231fe @FarMcKon committed Sep 1, 2012
Showing with 134 additions and 53 deletions.
  1. +29 −20 delicious_import.py
  2. +28 −18 gitmark.py
  3. +77 −15 gitmark_add.py
View
@@ -9,6 +9,7 @@
import sys
import urllib
+import logging
from xml.dom import minidom
from xml.parsers import expat
@@ -22,52 +23,53 @@ def cache_to_local_file(local_file, content):
h.close()
-def import_delicious_to_local_git(username, password='', url=None):
+def import_delicious_to_local_git(username, password='', url=None, doCache=True):
""" imports a delicious file to the local git system. If url is not set
a delicious API url is generated. if url is set (for a file, for example)
that file is imported."""
if not url:
- # API URL: https://user:passwd@api.del.icio.us/v1/posts/all
- url = "https://%s:%s@api.del.icio.us/v1/posts/all" % (username,
- password)
- #req = urllib2.Request(url, headers={'Accept':'application/xml'})
+ # API URL: https://user:passwd@api.del.icio.us/v1/posts/all
+ url = "https://%s:%s@api.del.icio.us/v1/posts/all" % (username, password)
+ #re: = urllib2.Request(url, headers={'Accept':'application/xml'})
h = urllib.urlopen(url)
else:
# Url is actually a local file in this case
url = urllub.pathname2url(url)
h = open(url)
content = h.read()
h.close()
+
#--enable to cache a copy of the file to test using
#cache_to_local_file('delicious_cache.htm', content):
# check for signs of a yahoo error page, with causes minidom to flip out
if( len(content) >=6 and content[:5] == '<!-- ' ):
- print content
- print "yahoo error, no data fetched "
+ logging.error(content)
+ logging.error("yahoo error, no data fetched")
return
try:
x = minidom.parseString(content)
except expat.ExpatError, e:
saveFile = "minidom_freakout.xml"
fh = open(saveFile, "w")
- print "== Above content caused minidom to flipped out\n %s" % (e)
- print "Saving problematic file as %s" % (saveFile)
+ logging.error("== Above content caused minidom to flipped out\n %s" % (e))
+ logging.error("Saving problematic file as %s" % (saveFile))
if(fh):
fh.write(content)
fh.close()
- print "Saved problematic file as %s" % (saveFile)
+ logging.error("Saved problematic file as %s" % (saveFile))
return -1
-
# sample post: <post href="http://www.pixelbeat.org/cmdline.html" hash="e3ac1d1e4403d077ee7e65f62a55c406" description="Linux Commands - A practical reference" tag="linux tutorial reference" time="2010-11-29T01:07:35Z" extended="" meta="c79362665abb0303d577b6b9aa341599" />
post_list = x.getElementsByTagName('post')
newMarksList = []
- for post_index, post in enumerate(post_list):
+ if doCache:
+ logging.warning("Caching data. This may be slow")
+ for post_index, post in enumerate(post_list):
try:
url = post.getAttribute('href')
desc = post.getAttribute('description')
@@ -89,25 +91,31 @@ def import_delicious_to_local_git(username, password='', url=None):
if(privateString == "0" or privateString==""):
g.private = False
-
+ g.parseTitle()
newMarksList.append(g)
#break here for single test without data resetting/fixing
except (KeyboardInterrupt, SystemExit):
print >>sys.stderr, ("backup interrupted by KeyboardInterrupt/SystemExit" )
+ logging.error( ("backup interrupted by KeyboardInterrupt/SystemExit" ) )
return
except Exception as e:
print >> sys.stderr, ("unknown exception %s" %(e))
+ logging.error(("unknown exception %s" %(e)))
- print "all kinds of new gitmarks!!"
- print "we have %d new marks" % len(newMarksList)
+ logging.info("all kinds of new gitmarks!!")
+ logging.info("we have %d new marks" % len(newMarksList))
for mark in newMarksList:
# FUTURE: speeed this up, by passing a whole list
- print "adding mark %s to repo %s" %(str(mark.title), str(mark.private) )
+ if mark.title is None: mark.title = "Untitiled bookmark"
+ logging.info("adding mark %s to repo %s" %(str(mark.title), str(mark.private) ))
+ if doCache:
+ mark.getContent()
+ print '.'
err = addToRepo(mark,doPush=False)
if (err > 0):
- print "mark add error %s" %str(err)
+ logging.info("mark add error %s" %str(err) )
return 0
# -- hack test main for when yahoo sucks and I need to test
@@ -138,7 +146,7 @@ def import_delicious_to_local_git(username, password='', url=None):
usage = """
Usage: python delicious_import.py cached-page-uri
OR
- Usage: python delicious_import.py username password
+ Usage: python delicious_import.py username password
***Password and username are sent as HTTPS***"
"""
@@ -153,8 +161,9 @@ def import_delicious_to_local_git(username, password='', url=None):
elif (len(sys.argv) == 3):
try:
(username, password) = sys.argv[1:]
- except ValueError:
- print usage
+ except ValueError as e:
+ print e
+ logging.error(e)
import_delicious_to_local_git(username, password)
else:
print usage
View
@@ -8,6 +8,7 @@
import csv
import subprocess
import time
+import logging
from optparse import OptionParser
import json
import hashlib
@@ -37,7 +38,7 @@ class gitmark(object):
creator = None
rights = None #creative commons rights string
tri = [] #transitionary resource locator. IRL bit.ly, goo.gl, etc
- content = '' #content of the site. Lazyloads and should do smart local/away fetch
+ content = None #content of the site. Lazyloads and should do smart local/away fetch
title = None
extended = None
meta = None
@@ -62,7 +63,7 @@ def __init__(self,uri, creator=None, dictValues=None):
def addTags(self, stringList):
#if we have more than 1 quote, split by quotes
if(stringList.count('"') > 1):
- print 'has qouted string! We fail'
+ logging.error('has qouted string! We fail')
else :
list = stringList.split(',')
list = [ l.lstrip().rstrip() for l in list]
@@ -143,11 +144,15 @@ def cacheContent(self, target_file, content=None):
content is specified, then that content is written instead
of the content in this gitmark
"""
+ if content == None:
+ if self.content == None:
+ self.getContent()
+ content = self.content
# -- lazily git store any existing file if necessary
if os.path.isfile(target_file) :
#check the md5 sum of the contet of this file,
#if it does NOT match our new content, then
- print "do magic here to md5 sum, and cache file if needed"
+ logging.error("do magic here to md5 sum, and cache file if needed")
if content == None:
content = self.content
self.cls_saveContent(target_file, content)
@@ -157,20 +162,20 @@ def addMyselfLocally(self, localGitmarkDir, localTagsDir):
This method causes a gitmark to
add itself to the local repository.
"""
- print "not used. old code. Use for reference only"
+ logging.error("not used. old code. Use for reference only")
exit(-5)
- print "adding myself to the local repository"
+ logging.info("adding myself to the local repository")
if(self.private != False):
- print "this is a private mark. Encrypting not yet enabled. Do not store"
+ logging.info("this is a private mark. Encrypting not yet enabled. Do not store")
else :
# -- write gitmark
fname = os.path.join(localGitmarkDir,self.hash)
#fp = open(fname,"w")
- print 'debug fwrite of file "%s"' % fp
- print '---'
- print self.JSONBlock()
- print '---'
+ logging.info('debug fwrite of file "%s"' % fp)
+ logging.info('---')
+ logging.info( self.JSONBlock() )
+ logging.info('---')
#fwrite(self.JSONBlock())
#fclose(fp)
# add git add here
@@ -183,7 +188,7 @@ def addMyselfLocally(self, localGitmarkDir, localTagsDir):
tags = set(uglyTags.append(prettyTags))
for tag in tags:
fname = os.path.join(localGitmarkDir,self.hash)
- print 'tag filename "%s" ' %fname
+ logging.info( 'tag filename "%s" ' %fname )
# add git add here
settings.TAG_SUB_PATH
@@ -208,12 +213,12 @@ def prettyTags(self):
""" tags, cleaned from delicious and make nicer looking"""
g = []
for t in self.tags:
- print t
+ logging.info ( t )
if '_' in t:
g.append(t.replace('_',' '))
else:
g.append(t)
- print g
+ logging.info( g )
return g
def uglyTags(self):
@@ -238,12 +243,12 @@ def cls_hydrate(cls, filename):
f.close()
del f
obj = json.loads(jsonObj)
- print obj
+ logging.info( obj )
mark = gitmark(settings.USER_NAME)
mark.__dict__.update(obj) #force update dict from file
return mark
- print "failed to read/load %s" %filename
+ logging.error( "failed to read/load %s" %filename)
return None
@classmethod
@@ -263,11 +268,15 @@ def cls_generateHash(cls, text):
@classmethod
def cls_getContent(cls, url):
+ """ Attempts to download content from the specified url,
+ @return data from the specified URL
+ """
try:
h = urllib.urlopen(url)
content = h.read()
h.close()
h = urllib.urlopen(url)
+
except IOError, e:
print >>sys.stderr, ("Error: could not retrieve the content of a"
" URL. The bookmark will be saved, but its content won't be"
@@ -282,9 +291,10 @@ def cls_getContent(cls, url):
@classmethod
def cls_parseTitle(cls, content):
+ if content == None : return '[No Title]'
re_htmltitle = re.compile(".*<title>(.*)</title>.*")
- t = re_htmltitle.search(content)
try:
+ t = re_htmltitle.search(content)
title = t.group(1)
except AttributeError:
title = '[No Title]'
@@ -318,14 +328,14 @@ def gitPush(cls, gitBaseDir = None):
# TRICKTY: sets the environment over to the base directory of the gitmarks base
cwd_dir = os.path.abspath(os.getcwd())
if gitBaseDir: os.chdir(os.path.abspath(gitBaseDir))
- print os.getcwd()
+ logging.info( os.getcwd() )
pipe = subprocess.Popen("git push origin master", shell=True) #Tricky: shell must be true
pipe.wait()
if gitBaseDir: os.chdir(cwd_dir)
class gitmarkRepoManager(object):
def __init__(self):
- print "initalizing a repo manager"
+ logging.info( "initalizing a repo manager")
Oops, something went wrong.

0 comments on commit a30f399

Please sign in to comment.