Skip to content

Commit

Permalink
Merge branch 'master' into buildbot-0.8.0
Browse files Browse the repository at this point in the history
* master:
  migrate recode_changes into OldChangeManager, don't convert to string
  add docs for potential encoding problems
  tweaks to contrib script
  Get fix_changes_pickle_encoding script working and tested...sort of
  Improve exception on unicode decode failure
  Raise an exception if the db can't store unicode data
  formatting
  Test regular ascii data
  Test that trying to import non-utf8 data will raise exceptions
  Make remove_none return u"" for None, and not replace data
  Reduce length of scheduler name, class_name columns so they fit with
  Decode strings from change objects as if they're utf-8.
  • Loading branch information
Dustin J. Mitchell committed May 13, 2010
2 parents e5b6ba7 + b7aa564 commit 6202035
Show file tree
Hide file tree
Showing 10 changed files with 269 additions and 26 deletions.
32 changes: 29 additions & 3 deletions buildbot/changes/changes.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,16 @@ def __init__(self, who, files, comments, isdir=0, links=None,
links = []
self.links = links

self.revision = util.none_or_str(revision)
def none_or_unicode(x):
if x is None: return x
return unicode(x)

self.revision = none_or_unicode(revision)
if when is None:
when = util.now()
self.when = when
self.branch = util.none_or_str(branch)
self.category = util.none_or_str(category)
self.branch = none_or_unicode(branch)
self.category = none_or_unicode(category)
self.revlink = revlink
self.properties = Properties()
self.properties.update(properties, "Change")
Expand Down Expand Up @@ -189,6 +193,28 @@ def saveYourself(self):
log.msg("unable to save changes")
log.err()

# This method is used by contrib/fix_changes_pickle_encoding.py to recode all
# bytestrings in an old changes.pck into unicode strings
def recode_changes(self, old_encoding, quiet=False):
"""Processes the list of changes, with the change attributes re-encoded
as UTF-8 bytestrings"""
nconvert = 0
for c in self.changes:
# give revision special handling, in case it is an integer
if isinstance(c.revision, int):
c.revision = unicode(c.revision)

for attr in ("who", "comments", "revlink", "category", "branch", "revision"):
a = getattr(c, attr)
if isinstance(a, str):
try:
setattr(c, attr, a.decode(old_encoding))
nconvert += 1
except UnicodeDecodeError:
raise UnicodeError("Error decoding %s of change #%s as %s:\n%r" %
(attr, c.number, old_encoding, a))
if not quiet: print "converted %d strings" % nconvert

class OldChangeMaster(ChangeMaster):
# this is a reminder that the ChangeMaster class is old
pass
Expand Down
2 changes: 1 addition & 1 deletion buildbot/db/dbspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def from_url(cls, url, basedir=None):
if 'max_idle' in args:
args['max_idle'] = int(args['max_idle'])

return cls("MySQLdb", **args)
return cls("MySQLdb", use_unicode=True, charset="utf8", **args)
else:
raise ValueError("Unsupported dbapi %s" % driver)

Expand Down
40 changes: 32 additions & 8 deletions buildbot/db/schema/v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@
textwrap.dedent("""
CREATE TABLE schedulers (
`schedulerid` INTEGER PRIMARY KEY, -- joins to other tables
`name` VARCHAR(256) UNIQUE NOT NULL,
`name` VARCHAR(127) UNIQUE NOT NULL,
`state` VARCHAR(1024) NOT NULL -- JSON-encoded state dictionary
);
"""),
Expand Down Expand Up @@ -241,10 +241,27 @@

class Upgrader(base.Upgrader):
def upgrade(self):
self.test_unicode()
self.add_tables()
self.migrate_changes()
self.set_version()

def test_unicode(self):
# first, create a test table
c = self.conn.cursor()
c.execute("CREATE TABLE test_unicode (`name` VARCHAR(100))")
q = util.sql_insert(self.dbapi, 'test_unicode', ["name"])
try:
val = u"Frosty the \N{SNOWMAN}"
c.execute(q, [val])
c.execute("SELECT * FROM test_unicode")
row = c.fetchall()[0]
if row[0] != val:
raise UnicodeError("Your database doesn't support unicode data; for MySQL, set the default collation to utf8_general_ci.")
finally:
pass
c.execute("DROP TABLE test_unicode")

def add_tables(self):
# first, add all of the tables
c = self.conn.cursor()
Expand All @@ -258,13 +275,20 @@ def add_tables(self):
def _addChangeToDatabase(self, change, cursor):
# strip None from any of these values, just in case
def remove_none(x):
if x is None: return ""
return x
values = tuple(remove_none(x) for x in
(change.number, change.who,
change.comments, change.isdir,
change.branch, change.revision, change.revlink,
change.when, change.category))
if x is None: return u""
elif isinstance(x, str):
return x.decode("utf8")
else:
return x
try:
values = tuple(remove_none(x) for x in
(change.number, change.who,
change.comments, change.isdir,
change.branch, change.revision, change.revlink,
change.when, change.category))
except UnicodeDecodeError, e:
raise UnicodeError("Trying to import change data as UTF-8 failed. Please look at contrib/fix_changes_pickle_encoding.py: %s" % str(e))

q = util.sql_insert(self.dbapi, 'changes',
"""changeid author comments is_dir branch revision
revlink when_timestamp category""".split())
Expand Down
4 changes: 2 additions & 2 deletions buildbot/db/schema/v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ def migrate_schedulers(self):
cursor.execute("""
CREATE TABLE schedulers (
`schedulerid` INTEGER PRIMARY KEY, -- joins to other tables
`name` VARCHAR(256) NOT NULL, -- the scheduler's name according to master.cfg
`class_name` VARCHAR(256) NOT NULL, -- the scheduler's class
`name` VARCHAR(127) NOT NULL, -- the scheduler's name according to master.cfg
`class_name` VARCHAR(127) NOT NULL, -- the scheduler's class
`state` VARCHAR(1024) NOT NULL -- JSON-encoded state dictionary
);
""")
Expand Down
4 changes: 2 additions & 2 deletions buildbot/db/schema/v4.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ def migrate_schedulers(self):
schema = """
CREATE TABLE schedulers (
%(schedulerid_col)s, -- joins to other tables
`name` VARCHAR(256) NOT NULL, -- the scheduler's name according to master.cfg
`class_name` VARCHAR(256) NOT NULL, -- the scheduler's class
`name` VARCHAR(100) NOT NULL, -- the scheduler's name according to master.cfg
`class_name` VARCHAR(100) NOT NULL, -- the scheduler's class
`state` VARCHAR(1024) NOT NULL -- JSON-encoded state dictionary
);
""" % locals()
Expand Down
125 changes: 125 additions & 0 deletions buildbot/test/regressions/test_import_unicode_changes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import os
import shutil
import cPickle

from twisted.trial import unittest

from buildbot.changes.changes import Change, OldChangeMaster

from buildbot.db.schema import manager
from buildbot.db.dbspec import DBSpec
from buildbot.db.connector import DBConnector

import buildbot

class Thing:
def __init__(self, **kwargs):
self.__dict__.update(kwargs)


class TestUnicodeChanges(unittest.TestCase):
def setUp(self):
self.basedir = "UnicodeChanges"
if os.path.exists(self.basedir):
shutil.rmtree(self.basedir)
os.makedirs(self.basedir)

# Now try the upgrade process, which will import the old changes.
self.spec = DBSpec.from_url("sqlite:///state.sqlite", self.basedir)

self.db = DBConnector(self.spec)
self.db.start()

def tearDown(self):
if self.db:
self.db.stop()

def testUnicodeChange(self):
# Create changes.pck
changes = [Change(who=u"Frosty the \N{SNOWMAN}".encode("utf8"),
files=["foo"], comments=u"Frosty the \N{SNOWMAN}".encode("utf8"),
branch="b1", revision=12345)]
cPickle.dump(Thing(changes=changes), open(os.path.join(self.basedir,
"changes.pck"), "w"))

sm = manager.DBSchemaManager(self.spec, self.basedir)
sm.upgrade(quiet=True)

c = self.db.getChangeNumberedNow(1)

self.assertEquals(c.who, u"Frosty the \N{SNOWMAN}")
self.assertEquals(c.comments, u"Frosty the \N{SNOWMAN}")

def testNonUnicodeChange(self):
# Create changes.pck
changes = [Change(who="\xff\xff\x00", files=["foo"],
comments="\xff\xff\x00", branch="b1", revision=12345)]
cPickle.dump(Thing(changes=changes), open(os.path.join(self.basedir,
"changes.pck"), "w"))

sm = manager.DBSchemaManager(self.spec, self.basedir)
self.assertRaises(UnicodeError, lambda : sm.upgrade(quiet=True))

def testAsciiChange(self):
# Create changes.pck
changes = [Change(who="Frosty the Snowman",
files=["foo"], comments="Frosty the Snowman", branch="b1", revision=12345)]
cPickle.dump(Thing(changes=changes), open(os.path.join(self.basedir,
"changes.pck"), "w"))

sm = manager.DBSchemaManager(self.spec, self.basedir)
sm.upgrade(quiet=True)

c = self.db.getChangeNumberedNow(1)

self.assertEquals(c.who, "Frosty the Snowman")
self.assertEquals(c.comments, "Frosty the Snowman")

def testUTF16Change(self):
# Create changes.pck
cm = OldChangeMaster()
cm.changes = [Change(who=u"Frosty the \N{SNOWMAN}".encode("utf16"),
files=["foo"], comments=u"Frosty the \N{SNOWMAN}".encode("utf16"),
branch="b1", revision=12345)]

# instead of running contrib/fix_changes_pickle_encoding.py, we just call
# the changemanager's recode_changes directly - it's the function at the
# heart of the script anyway.
cm.recode_changes('utf16', quiet=True)

# and dump the recoded changemanager to changes.pck before trying a schema upgrade
cPickle.dump(cm, open(os.path.join(self.basedir, "changes.pck"), "w"))

sm = manager.DBSchemaManager(self.spec, self.basedir)
sm.upgrade(quiet=True)

c = self.db.getChangeNumberedNow(1)

self.assertEquals(c.who, u"Frosty the \N{SNOWMAN}")
self.assertEquals(c.comments, u"Frosty the \N{SNOWMAN}")

class TestMySQLDBUnicodeChanges(TestUnicodeChanges):
def setUp(self):
self.basedir = "MySQLDBUnicodeChanges"
if os.path.exists(self.basedir):
shutil.rmtree(self.basedir)
os.makedirs(self.basedir)

# Now try the upgrade process, which will import the old changes.
self.spec = DBSpec.from_url(
"mysql://buildbot_test:buildbot_test@localhost/buildbot_test", self.basedir)

self.db = DBConnector(self.spec)
self.db.start()

result = self.db.runQueryNow("SHOW TABLES")
for row in result:
self.db.runQueryNow("DROP TABLE %s" % row[0])
self.db.runQueryNow("COMMIT")

try:
import MySQLdb
conn = MySQLdb.connect(user="buildbot_test", db="buildbot_test",
passwd="buildbot_test", use_unicode=True, charset='utf8')
except:
TestMySQLDBUnicodeChanges.skip = True
16 changes: 8 additions & 8 deletions buildbot/test/unit/test_db_dbspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,43 +88,43 @@ def test_fromURL_mysql(self):
basedir = "/foo/bar"
d = dbspec.DBSpec.from_url("mysql://somehost.com/dbname", basedir=basedir)
self.failUnlessConnection(d, 'MySQLdb',
connkw=dict(host='somehost.com', db='dbname'))
connkw=dict(host='somehost.com', db='dbname', use_unicode=True, charset='utf8'))

def test_fromURL_mysqlNoBasedir(self):
d = dbspec.DBSpec.from_url("mysql://somehost.com/dbname")
self.failUnlessConnection(d, 'MySQLdb',
connkw=dict(host='somehost.com', db='dbname'))
connkw=dict(host='somehost.com', db='dbname', use_unicode=True, charset='utf8'))

def test_fromURL_mysqlPort(self):
d = dbspec.DBSpec.from_url("mysql://somehost.com:9000/dbname")
self.failUnlessConnection(d, 'MySQLdb',
connkw=dict(host='somehost.com', db='dbname', port=9000))
connkw=dict(host='somehost.com', db='dbname', port=9000, use_unicode=True, charset='utf8'))

def test_fromURL_mysqlLocal(self):
d = dbspec.DBSpec.from_url("mysql:///database_name")
self.failUnlessConnection(d, 'MySQLdb',
connkw=dict(host=None, db='database_name'))
connkw=dict(host=None, db='database_name', use_unicode=True, charset='utf8'))

def test_fromURL_mysqlAuth(self):
d = dbspec.DBSpec.from_url("mysql://user:pass@somehost.com/dbname")
self.failUnlessConnection(d, 'MySQLdb',
connkw=dict(host='somehost.com', db='dbname', user="user", passwd="pass"))
connkw=dict(host='somehost.com', db='dbname', user="user", passwd="pass", use_unicode=True, charset='utf8'))

def test_fromURL_mysqlAuthNoPass(self):
d = dbspec.DBSpec.from_url("mysql://user@somehost.com/dbname")
self.failUnlessConnection(d, 'MySQLdb',
connkw=dict(host='somehost.com', db='dbname', user="user"))
connkw=dict(host='somehost.com', db='dbname', user="user", use_unicode=True, charset='utf8'))

def test_fromURL_mysqlAuthNoPassPort(self):
d = dbspec.DBSpec.from_url("mysql://user@somehost.com:8000/dbname")
self.failUnlessConnection(d, 'MySQLdb',
connkw=dict(host='somehost.com', db='dbname', user="user", port=8000))
connkw=dict(host='somehost.com', db='dbname', user="user", port=8000, use_unicode=True, charset='utf8'))

def test_fromURL_mysqlAuthNoPassPortArgs(self):
d = dbspec.DBSpec.from_url("mysql://user@somehost.com:8000/dbname?foo=moo")
self.failUnlessConnection(d, 'MySQLdb',
connkw=dict(host='somehost.com', db='dbname', user="user",
port=8000, foo="moo"))
port=8000, foo="moo", use_unicode=True, charset='utf8'))

class DBSpec_methods(unittest.TestCase):

Expand Down
4 changes: 2 additions & 2 deletions buildbot/test/unit/test_db_schema_master.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def setUp(self):
shutil.rmtree(self.basedir)
os.makedirs(self.basedir)

self.conn = MySQLdb.connect(user="buildbot_test", db="buildbot_test", passwd="buildbot_test")
self.conn = MySQLdb.connect(user="buildbot_test", db="buildbot_test", passwd="buildbot_test", use_unicode=True, charset='utf8')
# Drop all previous tables
cur = self.conn.cursor()
cur.execute("SHOW TABLES")
Expand All @@ -233,6 +233,6 @@ def setUp(self):

try:
import MySQLdb
conn = MySQLdb.connect(user="buildbot_test", db="buildbot_test", passwd="buildbot_test")
conn = MySQLdb.connect(user="buildbot_test", db="buildbot_test", passwd="buildbot_test", use_unicode=True, charset='utf8')
except:
MySQLDBSchemaManager.skip = True
45 changes: 45 additions & 0 deletions contrib/fix_changes_pickle_encoding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/usr/bin/python
"""%prog [options] [changes.pck] old_encoding
Re-encodes changes in a pickle file to UTF-8 from the given encoding
"""

if __name__ == '__main__':
import sys, os
from cPickle import load, dump
from optparse import OptionParser

parser = OptionParser(__doc__)

options, args = parser.parse_args()

if len(args) == 2:
changes_file = args[0]
old_encoding = args[1]
elif len(args) == 1:
changes_file = "changes.pck"
old_encoding = args[0]
else:
parser.error("Need at least one argument")

print "opening %s" % (changes_file,)
try:
fp = open(changes_file)
except IOError, e:
parser.error("Couldn't open %s: %s" % (changes_file, str(e)))

changemgr = load(fp)
fp.close()

print "decoding bytestrings in %s using %s" % (changes_file, old_encoding)
changemgr.recode_changes(old_encoding)

changes_backup = changes_file + ".old"
i = 0
while os.path.exists(changes_backup):
i += 1
changes_backup = changes_file + ".old.%i" % i
print "backing up %s to %s" % (changes_file, changes_backup)
os.rename(changes_file, changes_backup)

dump(changemgr, open(changes_file, "w"))

0 comments on commit 6202035

Please sign in to comment.