Skip to content

Commit

Permalink
Merge branch 'ticket820'
Browse files Browse the repository at this point in the history
* ticket820:
  add docs for potential encoding problems
  tweaks to contrib script
  Get fix_changes_pickle_encoding script working and tested...sort of
  Improve exception on unicode decode failure
  Raise an exception if the db can't store unicode data
  formatting
  Test regular ascii data
  Test that trying to import non-utf8 data will raise exceptions
  Make remove_none return u"" for None, and not replace data
  Reduce length of scheduler name, class_name columns so they fit with
  Decode strings from change objects as if they're utf-8.
  • Loading branch information
Dustin J. Mitchell committed May 13, 2010
2 parents bcd0353 + fbdfb87 commit 12b0be6
Show file tree
Hide file tree
Showing 9 changed files with 256 additions and 23 deletions.
2 changes: 1 addition & 1 deletion buildbot/db/dbspec.py
Expand Up @@ -157,7 +157,7 @@ def from_url(cls, url, basedir=None):
if 'max_idle' in args:
args['max_idle'] = int(args['max_idle'])

return cls("MySQLdb", **args)
return cls("MySQLdb", use_unicode=True, charset="utf8", **args)
else:
raise ValueError("Unsupported dbapi %s" % driver)

Expand Down
40 changes: 32 additions & 8 deletions buildbot/db/schema/v1.py
Expand Up @@ -116,7 +116,7 @@
textwrap.dedent("""
CREATE TABLE schedulers (
`schedulerid` INTEGER PRIMARY KEY, -- joins to other tables
`name` VARCHAR(256) UNIQUE NOT NULL,
`name` VARCHAR(127) UNIQUE NOT NULL,
`state` VARCHAR(1024) NOT NULL -- JSON-encoded state dictionary
);
"""),
Expand Down Expand Up @@ -241,10 +241,27 @@

class Upgrader(base.Upgrader):
def upgrade(self):
self.test_unicode()
self.add_tables()
self.migrate_changes()
self.set_version()

def test_unicode(self):
# first, create a test table
c = self.conn.cursor()
c.execute("CREATE TABLE test_unicode (`name` VARCHAR(100))")
q = util.sql_insert(self.dbapi, 'test_unicode', ["name"])
try:
val = u"Frosty the \N{SNOWMAN}"
c.execute(q, [val])
c.execute("SELECT * FROM test_unicode")
row = c.fetchall()[0]
if row[0] != val:
raise UnicodeError("Your database doesn't support unicode data; for MySQL, set the default collation to utf8_general_ci.")
finally:
pass
c.execute("DROP TABLE test_unicode")

def add_tables(self):
# first, add all of the tables
c = self.conn.cursor()
Expand All @@ -258,13 +275,20 @@ def add_tables(self):
def _addChangeToDatabase(self, change, cursor):
# strip None from any of these values, just in case
def remove_none(x):
if x is None: return ""
return x
values = tuple(remove_none(x) for x in
(change.number, change.who,
change.comments, change.isdir,
change.branch, change.revision, change.revlink,
change.when, change.category))
if x is None: return u""
elif isinstance(x, str):
return x.decode("utf8")
else:
return x
try:
values = tuple(remove_none(x) for x in
(change.number, change.who,
change.comments, change.isdir,
change.branch, change.revision, change.revlink,
change.when, change.category))
except UnicodeDecodeError, e:
raise UnicodeError("Trying to import change data as UTF-8 failed. Please look at contrib/fix_changes_pickle_encoding.py: %s" % str(e))

q = util.sql_insert(self.dbapi, 'changes',
"""changeid author comments is_dir branch revision
revlink when_timestamp category""".split())
Expand Down
4 changes: 2 additions & 2 deletions buildbot/db/schema/v3.py
Expand Up @@ -18,8 +18,8 @@ def migrate_schedulers(self):
cursor.execute("""
CREATE TABLE schedulers (
`schedulerid` INTEGER PRIMARY KEY, -- joins to other tables
`name` VARCHAR(256) NOT NULL, -- the scheduler's name according to master.cfg
`class_name` VARCHAR(256) NOT NULL, -- the scheduler's class
`name` VARCHAR(127) NOT NULL, -- the scheduler's name according to master.cfg
`class_name` VARCHAR(127) NOT NULL, -- the scheduler's class
`state` VARCHAR(1024) NOT NULL -- JSON-encoded state dictionary
);
""")
Expand Down
4 changes: 2 additions & 2 deletions buildbot/db/schema/v4.py
Expand Up @@ -66,8 +66,8 @@ def migrate_schedulers(self):
schema = """
CREATE TABLE schedulers (
%(schedulerid_col)s, -- joins to other tables
`name` VARCHAR(256) NOT NULL, -- the scheduler's name according to master.cfg
`class_name` VARCHAR(256) NOT NULL, -- the scheduler's class
`name` VARCHAR(100) NOT NULL, -- the scheduler's name according to master.cfg
`class_name` VARCHAR(100) NOT NULL, -- the scheduler's class
`state` VARCHAR(1024) NOT NULL -- JSON-encoded state dictionary
);
""" % locals()
Expand Down
123 changes: 123 additions & 0 deletions buildbot/test/regressions/test_import_unicode_changes.py
@@ -0,0 +1,123 @@
import os
import shutil
import cPickle

from twisted.trial import unittest

from buildbot.changes.changes import Change

from buildbot.db.schema import manager
from buildbot.db.dbspec import DBSpec
from buildbot.db.connector import DBConnector

import buildbot

class Thing:
def __init__(self, **kwargs):
self.__dict__.update(kwargs)


class TestUnicodeChanges(unittest.TestCase):
def setUp(self):
self.basedir = "UnicodeChanges"
if os.path.exists(self.basedir):
shutil.rmtree(self.basedir)
os.makedirs(self.basedir)

# Now try the upgrade process, which will import the old changes.
self.spec = DBSpec.from_url("sqlite:///state.sqlite", self.basedir)

self.db = DBConnector(self.spec)
self.db.start()

def tearDown(self):
if self.db:
self.db.stop()

def testUnicodeChange(self):
# Create changes.pck
changes = [Change(who=u"Frosty the \N{SNOWMAN}".encode("utf8"),
files=["foo"], comments=u"Frosty the \N{SNOWMAN}".encode("utf8"),
branch="b1", revision=12345)]
cPickle.dump(Thing(changes=changes), open(os.path.join(self.basedir,
"changes.pck"), "w"))

sm = manager.DBSchemaManager(self.spec, self.basedir)
sm.upgrade()

c = self.db.getChangeNumberedNow(1)

self.assertEquals(c.who, u"Frosty the \N{SNOWMAN}")
self.assertEquals(c.comments, u"Frosty the \N{SNOWMAN}")

def testNonUnicodeChange(self):
# Create changes.pck
changes = [Change(who="\xff\xff\x00", files=["foo"],
comments="\xff\xff\x00", branch="b1", revision=12345)]
cPickle.dump(Thing(changes=changes), open(os.path.join(self.basedir,
"changes.pck"), "w"))

sm = manager.DBSchemaManager(self.spec, self.basedir)
self.assertRaises(UnicodeError, sm.upgrade)

def testAsciiChange(self):
# Create changes.pck
changes = [Change(who="Frosty the Snowman",
files=["foo"], comments="Frosty the Snowman", branch="b1", revision=12345)]
cPickle.dump(Thing(changes=changes), open(os.path.join(self.basedir,
"changes.pck"), "w"))

sm = manager.DBSchemaManager(self.spec, self.basedir)
sm.upgrade()

c = self.db.getChangeNumberedNow(1)

self.assertEquals(c.who, "Frosty the Snowman")
self.assertEquals(c.comments, "Frosty the Snowman")

def testUTF16Change(self):
# Create changes.pck
changes = [Change(who=u"Frosty the \N{SNOWMAN}".encode("utf16"),
files=["foo"], comments=u"Frosty the \N{SNOWMAN}".encode("utf16"),
branch="b1", revision=12345)]
cPickle.dump(Thing(changes=changes), open(os.path.join(self.basedir,
"changes.pck"), "w"))

# Run fix_changes_pickle_encoding.py
contrib_dir = os.path.join(os.path.dirname(buildbot.__file__), "../contrib")
retval = os.system("python %s/fix_changes_pickle_encoding.py %s utf16" % (contrib_dir, os.path.join(self.basedir, "changes.pck")))
self.assertEquals(retval, 0)

sm = manager.DBSchemaManager(self.spec, self.basedir)
sm.upgrade()

c = self.db.getChangeNumberedNow(1)

self.assertEquals(c.who, u"Frosty the \N{SNOWMAN}")
self.assertEquals(c.comments, u"Frosty the \N{SNOWMAN}")

class TestMySQLDBUnicodeChanges(TestUnicodeChanges):
def setUp(self):
self.basedir = "MySQLDBUnicodeChanges"
if os.path.exists(self.basedir):
shutil.rmtree(self.basedir)
os.makedirs(self.basedir)

# Now try the upgrade process, which will import the old changes.
self.spec = DBSpec.from_url(
"mysql://buildbot_test:buildbot_test@localhost/buildbot_test", self.basedir)

self.db = DBConnector(self.spec)
self.db.start()

result = self.db.runQueryNow("SHOW TABLES")
for row in result:
self.db.runQueryNow("DROP TABLE %s" % row[0])
self.db.runQueryNow("COMMIT")

try:
import MySQLdb
conn = MySQLdb.connect(user="buildbot_test", db="buildbot_test",
passwd="buildbot_test", use_unicode=True, charset='utf8')
except:
TestMySQLDBUnicodeChanges.skip = True
16 changes: 8 additions & 8 deletions buildbot/test/unit/test_db_dbspec.py
Expand Up @@ -88,43 +88,43 @@ def test_fromURL_mysql(self):
basedir = "/foo/bar"
d = dbspec.DBSpec.from_url("mysql://somehost.com/dbname", basedir=basedir)
self.failUnlessConnection(d, 'MySQLdb',
connkw=dict(host='somehost.com', db='dbname'))
connkw=dict(host='somehost.com', db='dbname', use_unicode=True, charset='utf8'))

def test_fromURL_mysqlNoBasedir(self):
d = dbspec.DBSpec.from_url("mysql://somehost.com/dbname")
self.failUnlessConnection(d, 'MySQLdb',
connkw=dict(host='somehost.com', db='dbname'))
connkw=dict(host='somehost.com', db='dbname', use_unicode=True, charset='utf8'))

def test_fromURL_mysqlPort(self):
d = dbspec.DBSpec.from_url("mysql://somehost.com:9000/dbname")
self.failUnlessConnection(d, 'MySQLdb',
connkw=dict(host='somehost.com', db='dbname', port=9000))
connkw=dict(host='somehost.com', db='dbname', port=9000, use_unicode=True, charset='utf8'))

def test_fromURL_mysqlLocal(self):
d = dbspec.DBSpec.from_url("mysql:///database_name")
self.failUnlessConnection(d, 'MySQLdb',
connkw=dict(host=None, db='database_name'))
connkw=dict(host=None, db='database_name', use_unicode=True, charset='utf8'))

def test_fromURL_mysqlAuth(self):
d = dbspec.DBSpec.from_url("mysql://user:pass@somehost.com/dbname")
self.failUnlessConnection(d, 'MySQLdb',
connkw=dict(host='somehost.com', db='dbname', user="user", passwd="pass"))
connkw=dict(host='somehost.com', db='dbname', user="user", passwd="pass", use_unicode=True, charset='utf8'))

def test_fromURL_mysqlAuthNoPass(self):
d = dbspec.DBSpec.from_url("mysql://user@somehost.com/dbname")
self.failUnlessConnection(d, 'MySQLdb',
connkw=dict(host='somehost.com', db='dbname', user="user"))
connkw=dict(host='somehost.com', db='dbname', user="user", use_unicode=True, charset='utf8'))

def test_fromURL_mysqlAuthNoPassPort(self):
d = dbspec.DBSpec.from_url("mysql://user@somehost.com:8000/dbname")
self.failUnlessConnection(d, 'MySQLdb',
connkw=dict(host='somehost.com', db='dbname', user="user", port=8000))
connkw=dict(host='somehost.com', db='dbname', user="user", port=8000, use_unicode=True, charset='utf8'))

def test_fromURL_mysqlAuthNoPassPortArgs(self):
d = dbspec.DBSpec.from_url("mysql://user@somehost.com:8000/dbname?foo=moo")
self.failUnlessConnection(d, 'MySQLdb',
connkw=dict(host='somehost.com', db='dbname', user="user",
port=8000, foo="moo"))
port=8000, foo="moo", use_unicode=True, charset='utf8'))

class DBSpec_methods(unittest.TestCase):

Expand Down
4 changes: 2 additions & 2 deletions buildbot/test/unit/test_db_schema_master.py
Expand Up @@ -218,7 +218,7 @@ def setUp(self):
shutil.rmtree(self.basedir)
os.makedirs(self.basedir)

self.conn = MySQLdb.connect(user="buildbot_test", db="buildbot_test", passwd="buildbot_test")
self.conn = MySQLdb.connect(user="buildbot_test", db="buildbot_test", passwd="buildbot_test", use_unicode=True, charset='utf8')
# Drop all previous tables
cur = self.conn.cursor()
cur.execute("SHOW TABLES")
Expand All @@ -233,6 +233,6 @@ def setUp(self):

try:
import MySQLdb
conn = MySQLdb.connect(user="buildbot_test", db="buildbot_test", passwd="buildbot_test")
conn = MySQLdb.connect(user="buildbot_test", db="buildbot_test", passwd="buildbot_test", use_unicode=True, charset='utf8')
except:
MySQLDBSchemaManager.skip = True
63 changes: 63 additions & 0 deletions contrib/fix_changes_pickle_encoding.py
@@ -0,0 +1,63 @@
#!/usr/bin/python
"""%prog [options] [changes.pck] old_encoding
Re-encodes changes in a pickle file to UTF-8 from the given encoding
"""

def recode_changes(changes, old_encoding):
"""Returns a new list of changes, with the change attributes re-encoded
as UTF-8 bytestrings"""
retval = []
nconvert = 0
for c in changes:
for attr in ("who", "comments", "revlink", "category", "branch", "revision"):
a = getattr(c, attr)
if isinstance(a, str):
try:
setattr(c, attr, a.decode(old_encoding))
nconvert += 1
except UnicodeDecodeError:
raise UnicodeError("Error decoding %s of change #%s as %s:\n%s" % (attr, c.number, old_encoding, a))
retval.append(c)
print "converted %d strings" % nconvert
return retval

if __name__ == '__main__':
import sys, os
from cPickle import load, dump
from optparse import OptionParser

parser = OptionParser(__doc__)

options, args = parser.parse_args()

if len(args) == 2:
changes_file = args[0]
old_encoding = args[1]
elif len(args) == 1:
changes_file = "changes.pck"
old_encoding = args[0]
else:
parser.error("Need at least one argument")

print "opening %s" % (changes_file,)
try:
fp = open(changes_file)
except IOError, e:
parser.error("Couldn't open %s: %s" % (changes_file, str(e)))

changes = load(fp)
fp.close()

print "decoding bytestrings in %s using %s" % (changes_file, old_encoding)
changes.changes = recode_changes(changes.changes, old_encoding)

changes_backup = changes_file + ".old"
i = 0
while os.path.exists(changes_backup):
i += 1
changes_backup = changes_file + ".old.%i" % i

print "backing up %s to %s" % (changes_file, changes_backup)
os.rename(changes_file, changes_backup)
dump(changes, open(changes_file, "w"))
23 changes: 23 additions & 0 deletions docs/installation.texinfo
Expand Up @@ -234,6 +234,29 @@ The upgrade process renames the Changes pickle (@code{$basedir/changes.pck}) to
simply downgrade Buildbot and move this file back to its original name. You
may also wish to delete the state database (@code{state.sqlite}).

@heading Change Encoding Issues

The upgrade process assumes that strings in your Changes pickle are encoded in
UTF-8 (or plain ASCII). If this is not the case, and if there are non-UTF-8
characters in the pickle, the upgrade will fail with a suitable error message.
If this occurs, you have two options. If the change history is not important
to your purpose, you can simply delete @code{changes.pck}.

If you would like to keep the change history, then you will need to figure out
which encoding is in use, and use @code{contrib/fix_changes_pickle_encoding.py}
to rewrite the changes pickle into Unicode before upgrading the master. A
typical invocation (with Mac-Roman encoding) might look like:

@example
$ python $buildbot/contrib/fix_changes_pickle_encoding.py changes.pck macroman
decoding bytestrings in changes.pck using macroman
converted 11392 strings
backing up changes.pck to changes.pck.old
@end example

If your Changes pickle uses multiple encodings, you're on your own, but the
script in contrib may provide a good starting point for the fix.

@node Creating a buildslave
@section Creating a buildslave

Expand Down

0 comments on commit 12b0be6

Please sign in to comment.