Skip to content
This repository has been archived by the owner on Nov 6, 2023. It is now read-only.

Commit

Permalink
Merge remote-tracking branch 'jsha/sqlite'
Browse files Browse the repository at this point in the history
Conflicts:
	src/chrome/content/code/HTTPSRules.js
  • Loading branch information
pde committed Jan 22, 2014
2 parents 5fba110 + ba496d7 commit 414c3e9
Show file tree
Hide file tree
Showing 4 changed files with 160 additions and 43 deletions.
16 changes: 7 additions & 9 deletions makexpi.sh
Expand Up @@ -15,6 +15,7 @@ APP_NAME=https-everywhere
# ./makexpi.sh 0.2.3.development.2

cd "`dirname $0`"
RULESETS_SQLITE="$PWD/src/defaults/rulesets.sqlite"

[ -d pkg ] || mkdir pkg

Expand Down Expand Up @@ -97,6 +98,11 @@ if [ "$1" != "--fast" ] ; then
fi
# =============== END VALIDATION ================

if [ "$1" != "--fast" -o ! -f "$RULESETS_SQLITE" ] ; then
echo "Generating sqlite DB"
./utils/make-sqlite.py src/chrome/content/rules
fi

# The name/version of the XPI we're building comes from src/install.rdf
XPI_NAME="pkg/$APP_NAME-`grep em:version src/install.rdf | sed -e 's/[<>]/ /g' | cut -f3`"
if [ "$1" ] && [ "$1" != "--fast" ] ; then
Expand All @@ -114,14 +120,6 @@ if [ -e "$GIT_OBJECT_FILE" ]; then
export GIT_COMMIT_ID=$(cat "$GIT_OBJECT_FILE")
fi

# Unless we're in a hurry and there's already a ruleset library, build it from
# the ruleset .xml files

if [ "$1" = "--fast" ] ; then
FAST="--fast"
fi
python ./utils/merge-rulesets.py $FAST

cd src

# Build the XPI!
Expand All @@ -135,7 +133,7 @@ if [ "$ret" != 0 ]; then
rm -f "../$XPI_NAME"
exit "$?"
else
echo >&2 "Total included rules: `find chrome/content/rules -name "*.xml" | wc -l`"
echo >&2 "Total included rules: `sqlite3 $RULESETS_SQLITE 'select count(*) from rulesets'`"
echo >&2 "Rules disabled by default: `find chrome/content/rules -name "*.xml" | xargs grep -F default_off | wc -l`"
echo >&2 "Created $XPI_NAME"
if [ -n "$BRANCH" ]; then
Expand Down
112 changes: 78 additions & 34 deletions src/chrome/content/code/HTTPSRules.js
Expand Up @@ -280,6 +280,12 @@ const RuleWriter = {

sstream.close();
fstream.close();
return this.readFromString(data, rule_store, file);
},

readFromString: function(data, rule_store, file) {
if (typeof file === 'undefined') file = {path: 'fromString'};

// XXX: With DOMParser, we probably do not need to throw away the XML
// declaration anymore nowadays.
data = data.replace(/<\?xml[^>]*\?>/, "");
Expand Down Expand Up @@ -414,30 +420,29 @@ const HTTPSRules = {
this.rulesetsByName = {};
var t1 = new Date().getTime();
this.checkMixedContentHandling();
var rulefiles = RuleWriter.enumerate(RuleWriter.getCustomRuleDir());
this.scanRulefiles(rulefiles);
rulefiles = RuleWriter.enumerate(RuleWriter.getRuleDir());
this.scanRulefiles(rulefiles);
var t,i;
for (t in this.targets) {
for (i = 0 ; i < this.targets[t].length ; i++) {
this.log(INFO, t + " -> " + this.targets[t][i].name);
}
}

// for any rulesets with <target host="*">
// every URI needs to be checked against these rulesets
// (though currently we don't ship any)
this.global_rulesets = this.targets["*"] ? this.targets["*"] : [];

this.rulesets.sort(
function(r1,r2) {
if (r1.name.toLowerCase() < r2.name.toLowerCase()) return -1;
else return 1;
}
);
// Initialize database connection.
var dbFile = FileUtils.getFile("ProfD",
["extensions", "https-everywhere@eff.org", "defaults", "rulesets.sqlite"]);
var rulesetDBConn = Services.storage.openDatabase(dbFile);
this.queryForRuleset = rulesetDBConn.createStatement(
"select contents from rulesets where id = :id");

// Preload the list of which targets are available in the DB.
// This is a little slow (287 ms on a Core2 Duo @ 2.2GHz with SSD),
// but is faster than loading all of the rulesets. If this becomes a
// bottleneck, change it to load in a background webworker, or load
// a smaller bloom filter instead.
this.targetsAvailable = {};
var targetsQuery = rulesetDBConn.createStatement("select host, ruleset_id from targets");
this.log(DBUG, "Adding targets...");
while (targetsQuery.executeStep()) {
var host = targetsQuery.row.host;
this.targetsAvailable[host] = targetsQuery.row.ruleset_id;
}
this.log(DBUG, "Done adding targets.");
} catch(e) {
this.log(WARN,"Rules Failed: "+e);
this.log(DBUG,"Rules Failed: "+e);
}
var t2 = new Date().getTime();
this.log(NOTE,"Loading rulesets took " + (t2 - t1) / 1000.0 + " seconds");
Expand Down Expand Up @@ -498,6 +503,8 @@ const HTTPSRules = {
}
},

httpMatch: /^http/i,

rewrittenURI: function(alist, input_uri) {
// This function oversees the task of working out if a uri should be
// rewritten, what it should be rewritten to, and recordkeeping of which
Expand All @@ -518,7 +525,7 @@ const HTTPSRules = {
try {
var rs = this.potentiallyApplicableRulesets(uri.host);
} catch(e) {
this.log(WARN, 'Could not check applicable rules for '+uri.spec);
this.log(WARN, 'Could not check applicable rules for '+uri.spec + '\n'+e);
return null;
}

Expand Down Expand Up @@ -602,17 +609,54 @@ const HTTPSRules = {
intoList.push(fromList[i]);
},

// Try to find a ruleset in the SQLite database for a given target (e.g.
// '*.openssl.org')
// NOTE: This call runs synchronously, which can lock up the browser UI. Is
// there any way to fix that, given that we need to run blocking in the request
// flow? Perhaps we can preload all targets from the DB into memory at startup
// so we only hit the DB when we know there is something to be had.
queryTarget: function(target) {
this.log(DBUG, "Querying DB for " + target);
var output = [];

this.queryForRuleset.params.id = this.targetsAvailable[target];

try {
while (this.queryForRuleset.executeStep())
output.push(this.queryForRuleset.row.contents);
} finally {
this.queryForRuleset.reset();
}
return output;
},

potentiallyApplicableRulesets: function(host) {
// Return a list of rulesets that declare targets matching this host
var i, tmp, t;
var results = this.global_rulesets.slice(0); // copy global_rulesets
try {
if (this.targets[host])
results = results.concat(this.targets[host]);
} catch(e) {
this.log(DBUG,"Couldn't check for ApplicableRulesets: " + e);
return [];
}
var results = [];

var attempt = function(target) {
// First try the in-memory rulesets
if (this.targets[target] &&
this.targets[target].length > 0) {
this.setInsert(results, this.targets[target]);
} else if (this.targetsAvailable[target]) {
// If not found there, check the DB and load the ruleset as appropriate
var rulesets = this.queryTarget(target);
if (rulesets.length > 0) {
for (var i = 0; i < rulesets.length; i++) {
var ruleset = rulesets[i];
this.log(INFO, "Found ruleset in DB for " + host + ": " + ruleset);
RuleWriter.readFromString(ruleset, this);
this.setInsert(results, this.targets[target]);
}
} else {
this.nonTargets[target] = 1;
}
}
}.bind(this);

attempt(host);

// replace each portion of the domain with a * in turn
var segmented = host.split(".");
Expand All @@ -621,13 +665,13 @@ const HTTPSRules = {
segmented[i] = "*";
t = segmented.join(".");
segmented[i] = tmp;
this.setInsert(results, this.targets[t]);
attempt(t);
}
// now eat away from the left, with *, so that for x.y.z.google.com we
// check *.z.google.com and *.google.com (we did *.y.z.google.com above)
for (i = 1; i <= segmented.length - 2; ++i) {
for (i = 2; i <= segmented.length - 2; ++i) {
t = "*." + segmented.slice(i,segmented.length).join(".");
this.setInsert(results, this.targets[t]);
attempt(t);
}
this.log(DBUG,"Potentially applicable rules for " + host + ":");
for (i = 0; i < results.length; ++i)
Expand Down
3 changes: 3 additions & 0 deletions src/components/https-everywhere.js
Expand Up @@ -31,6 +31,9 @@ const Cc = Components.classes;
const Cu = Components.utils;
const Cr = Components.results;

Cu.import("resource://gre/modules/Services.jsm");
Cu.import("resource://gre/modules/FileUtils.jsm");

const CP_SHOULDPROCESS = 4;

const SERVICE_CTRID = "@eff.org/https-everywhere;1";
Expand Down
72 changes: 72 additions & 0 deletions utils/make-sqlite.py
@@ -0,0 +1,72 @@
#!/usr/bin/python2.7
#
# Builds an sqlite DB containing all the rulesets, indexed by target.

import sqlite3
import argparse
import sys, re, os

from lxml import etree

parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description="Ruleset validation script.")
parser.add_argument('ruleset', metavar='XML directory', type=str, nargs="*",
default="src/chrome/content/rules",
help='Directory of XML files to validate.')

args = parser.parse_args()

def nomes_all(where=sys.argv[1:]):
"""Returns generator to extract all files from a list of files/dirs"""
if not where: where=['.']
for i in where:
if os.path.isfile(i):
yield i
elif os.path.isdir(i):
for r, d, f in os.walk(i):
for fi in f:
yield os.path.join(r, fi)


conn = sqlite3.connect(os.path.join(os.path.dirname(__file__), '../src/defaults/rulesets.sqlite'))
c = conn.cursor()
c.execute('''DROP TABLE IF EXISTS rulesets''')
c.execute('''CREATE TABLE rulesets
(id INTEGER PRIMARY KEY,
name TEXT,
contents TEXT)''')
c.execute('''DROP TABLE IF EXISTS targets''')
c.execute('''CREATE TABLE targets
(host TEXT,
ruleset_id INTEGER)''')

parser = etree.XMLParser(remove_blank_text=True)

for fi in nomes_all():
try:
tree = etree.parse(fi, parser)
except Exception as oops:
if fi[-4:] != ".xml":
continue
print("%s failed XML validity: %s\n" % (fi, oops))
if not tree.xpath("/ruleset"):
continue

# Remove comments to save space.
etree.strip_tags(tree,etree.Comment)

targets = tree.xpath("/ruleset/target/@host")
# TODO: Strip target tags too. Right now the JS code requires there be a
# target tag.
#etree.strip_tags(tree,'target')

# TODO: filter out comments and targets to save storage bytes
ruleset_name = tree.xpath("/ruleset/@name")[0]
c.execute('''INSERT INTO rulesets (name, contents) VALUES(?, ?)''', (ruleset_name, etree.tostring(tree)));
ruleset_id = c.lastrowid
for target in targets:
c.execute('''INSERT INTO targets (host, ruleset_id) VALUES(?, ?)''', (target, ruleset_id));

conn.commit()
conn.close()

0 comments on commit 414c3e9

Please sign in to comment.