diff --git a/DesignNotes.txt b/DesignNotes.txt new file mode 100644 index 0000000..1ccbde8 --- /dev/null +++ b/DesignNotes.txt @@ -0,0 +1,32 @@ +An infinite number of heuristic/statistical tests could be added; I +will be much more likely to consider pull requests that add more if +you can point to somebody else's code on github that screws up +pseudorandom number generation in a way that your code catches. + +The first rough implementation of the duplication detection used a +really big bloom filter with a very small false positive rate. That +was fun to code, but it added a lot of complication and was only three +or four times smaller on disk than the more straightforward solution +using AppEngine's key/value store. + +The performance bottleneck is datastore reads for the duplication +detection. Testing a 64-byte byte array is 48 reads (and 2 writes). +If this service becomes very popular and AppEngine costs become an +issue that is the first place to optimize. + +I've done some preliminary testing and benchmarking of an algorithm +that uses 6 reads and 3 writes but sacrifices detection if the byte +arrays overlap in fewer than 32 bytes. + + +Possible future work, if there is demand: + +Public keys should be globally unique and look random. A tool that +finds all the public keys on a system and submits them would be +useful. It would be even more useful if you could safely run the +tool twice and not get false-positive reports of non-uniqueness +(if the query was tagged with the name or IP of the machine the +server could ignore duplicates with the same tag). + + + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..5cf2b18 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2017 Gavin Andresen + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index b0b14ea..b72f338 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,6 @@ -# rngsanity -Random Number Generator sanity project +Server-based, random number generator testing service + +AppEngine-based service to sanity test what should be +cryptographically secure random number bytestreams. + +See http://www.randomsanity.org/ for documentation and information. diff --git a/app.yaml b/app.yaml new file mode 100644 index 0000000..51946a9 --- /dev/null +++ b/app.yaml @@ -0,0 +1,6 @@ +runtime: go +api_version: go1 + +handlers: +- url: /.* + script: _go_app diff --git a/code_of_conduct.md b/code_of_conduct.md new file mode 100644 index 0000000..cba0108 --- /dev/null +++ b/code_of_conduct.md @@ -0,0 +1,51 @@ +# Contributor Code of Conduct + +As contributors and maintainers of this project, and in the interest of +fostering an open and welcoming community, we pledge to respect all people who +contribute through reporting issues, posting feature requests, updating +documentation, submitting pull requests or patches, and other activities. + +We are committed to making participation in this project a harassment-free +experience for everyone, regardless of level of experience, gender, gender +identity and expression, sexual orientation, disability, personal appearance, +body size, race, ethnicity, age, religion, or nationality. + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery +* Personal attacks +* Trolling or insulting/derogatory comments +* Public or private harassment +* Publishing other's private information, such as physical or electronic + addresses, without explicit permission +* Other unethical or unprofessional conduct + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +By adopting this Code of Conduct, project maintainers commit themselves to +fairly and consistently applying these principles to every aspect of managing +this project. Project maintainers who do not follow or enforce the Code of +Conduct may be permanently removed from the project team. + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting a project maintainer (see below). All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. Maintainers are +obligated to maintain confidentiality with regard to the reporter of an +incident. + +You may send reports to [our Conduct email](mailto:gavinandresen@gmail.com). + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 1.3.0, available at +[http://contributor-covenant.org/version/1/3/0/][version] + +[homepage]: http://contributor-covenant.org +[version]: http://contributor-covenant.org/version/1/3/0/ diff --git a/entropyheader.go b/entropyheader.go new file mode 100644 index 0000000..9f51db4 --- /dev/null +++ b/entropyheader.go @@ -0,0 +1,19 @@ +package randomsanity + +import ( + "crypto/rand" + "encoding/hex" + "net/http" +) + +func addEntropyHeader(w http.ResponseWriter) { + // This assumes server has a good crypto/rand + // implementation. We could memcache an array + // that is initialized to crypto/rand but updated + // with every request that comes in with random data. + var b [32]byte + n, err := rand.Read(b[:]) + if err == nil && n == len(b) { + w.Header().Add("X-Entropy", hex.EncodeToString(b[:])) + } +} diff --git a/index.yaml b/index.yaml new file mode 100644 index 0000000..94ada7f --- /dev/null +++ b/index.yaml @@ -0,0 +1,6 @@ +indexes: + +- kind: NotifyViaEmail + properties: + - name: UserId + - name: Address diff --git a/notify.go b/notify.go new file mode 100644 index 0000000..0767aa2 --- /dev/null +++ b/notify.go @@ -0,0 +1,221 @@ +package randomsanity + +import ( + "appengine" + "appengine/datastore" + "appengine/mail" + "crypto/rand" + "encoding/hex" + "fmt" + "log" + "net/http" + "strings" + "time" +) + +import netmail "net/mail" + +// Code to notify customer when a rng failure is detected + +type NotifyViaEmail struct { + UserID string + Address string +} + +// Return userID associated with request (or empty string) +func userID(ctx appengine.Context, id string) (*datastore.Key, error) { + // Only pay attention to ?id=123456 if they've done an authentication loop + // and are already in the database + if len(id) == 0 { + return nil, nil + } + q := datastore.NewQuery("NotifyViaEmail").Filter("UserID =", id).Limit(1).KeysOnly() + keys, err := q.GetAll(ctx, nil) + if err != nil || len(keys) == 0 { + return nil, err + } + return keys[0], nil +} + +// Register an email address. To authenticate ownership of the +// address, the server assigns a random user id and emails it. +// To mitigate abuse, this method is heavily rate-limited per +// IP and email address +func registerEmailHandler(w http.ResponseWriter, r *http.Request) { + // Requests generated by web browsers are not allowed: + if r.Header.Get("Origin") != "" { + http.Error(w, "CORS requests are not allowed", http.StatusForbidden) + return + } + ua := r.Header.Get("User-Agent") + if len(ua) < 4 || (!strings.EqualFold(ua[0:4], "curl") && !strings.EqualFold(ua[0:4], "wget")) { + http.Error(w, "Email registration must be done via curl or wget", http.StatusForbidden) + return + } + + w.Header().Add("Content-Type", "text/plain") + parts := strings.Split(r.URL.Path, "/") + if len(parts) < 4 { + http.Error(w, "Missing email", http.StatusBadRequest) + return + } + if len(parts) > 4 { + http.Error(w, "URL path too long", http.StatusBadRequest) + return + } + + addresses, err := netmail.ParseAddressList(parts[len(parts)-1]) + if err != nil || len(addresses) != 1 { + http.Error(w, "Invalid email address", http.StatusBadRequest) + return + } + address := addresses[0] + + ctx := appengine.NewContext(r) + + // 2 registrations per IP per day + limited, err := RateLimitResponse(ctx, w, IPKey("emailreg", r.RemoteAddr), 2, time.Hour*24) + if err != nil || limited { + return + } + // ... and 1 per email per week + limited, err = RateLimitResponse(ctx, w, "emailreg"+address.Address, 1, time.Hour*24*7) + if err != nil || limited { + return + } + // ... and global 10 signups per hour (so a botnet with lots of IPs cannot + // generate a huge surge of bogus registrations) + limited, err = RateLimitResponse(ctx, w, "emailreg", 10, time.Hour) + if err != nil || limited { + return + } + // Note: the AppEngine dashboard can also be used to set quotas. + // If somebody with a bunch of IP addresses is persistently annoying, + // we'll switch to a web page with a CAPTCHA or require sign-in with + // a Google account to register or require payment to register. + + var notify []NotifyViaEmail + q := datastore.NewQuery("NotifyViaEmail").Filter("Address =", address.Address) + if _, err := q.GetAll(ctx, ¬ify); err != nil { + http.Error(w, "Datastore error", http.StatusInternalServerError) + return + } + if len(notify) > 0 { + sendNewID(ctx, address.Address, notify[0].UserID) + fmt.Fprintf(w, "Check your email, ID sent to %s\n", address.Address) + return + } + bytes := make([]byte, 8) + if _, err := rand.Read(bytes); err != nil { + http.Error(w, "rand.Read error", http.StatusInternalServerError) + return + } + id := hex.EncodeToString(bytes) + n := NotifyViaEmail{id, address.Address} + k := datastore.NewIncompleteKey(ctx, "NotifyViaEmail", nil) + if _, err := datastore.Put(ctx, k, &n); err != nil { + http.Error(w, "Datastore error", http.StatusInternalServerError) + return + } + sendNewID(ctx, address.Address, id) + // HTTP response MUST NOT contain the id + fmt.Fprintf(w, "Check your email, ID sent to %s", address.Address) +} + +// Unregister, given userID +func unRegisterIDHandler(w http.ResponseWriter, r *http.Request) { + if r.Method != "DELETE" { + http.Error(w, "unregister method must be DELETE", http.StatusBadRequest) + return + } + parts := strings.Split(r.URL.Path, "/") + if len(parts) < 4 { + http.Error(w, "Missing userID", http.StatusBadRequest) + return + } + if len(parts) > 4 { + http.Error(w, "URL path too long", http.StatusBadRequest) + return + } + ctx := appengine.NewContext(r) + + uID := parts[len(parts)-1] + dbKey, err := userID(ctx, uID) + if err != nil { + http.Error(w, "datastore error", http.StatusInternalServerError) + return + } + if dbKey == nil { + http.Error(w, "User ID not found", http.StatusNotFound) + return + } + err = datastore.Delete(ctx, dbKey) + if err != nil { + http.Error(w, "Error deleting key", http.StatusInternalServerError) + return + } + fmt.Fprintf(w, "id %s unregistered\n", uID) +} + +func sendNewID(ctx appengine.Context, address string, id string) { + msg := &mail.Message{ + Sender: "randomsanityalerts@gmail.com", + To: []string{address}, + Subject: "Random Sanity id request", + } + msg.Body = fmt.Sprintf("Somebody requested an id for this email address (%s)\n"+ + "for the randomsanity.org service.\n"+ + "\n"+ + "id: %s\n"+ + "\n"+ + "Append ?id=%s to API calls to be notified of failures via email.\n"+ + "\n"+ + "If somebody is pretending to be you and you don't use the randomsanity.org\n"+ + "service, please ignore this message.\n", + address, id, id) + if err := mail.Send(ctx, msg); err != nil { + log.Printf("mail.Send failed: %s", err) + } +} + +func sendEmail(ctx appengine.Context, address string, tag string, b []byte, reason string) { + // Don't spam if there are hundreds of failures, limit to + // a handful per day: + limit, err := RateLimit(ctx, address, 5, time.Hour*24) + if err != nil || limit { + return + } + + msg := &mail.Message{ + Sender: "randomsanityalerts@gmail.com", + To: []string{address}, + Subject: "Random Number Generator Failure Detected", + } + msg.Body = fmt.Sprintf("The randomsanity.org service has detected a failure.\n"+ + "\n"+ + "Failure reason: %s\n"+ + "Data: 0x%s\n"+ + "Tag: %s\n", reason, hex.EncodeToString(b), tag) + if err := mail.Send(ctx, msg); err != nil { + log.Printf("mail.Send failed: %s", err) + } +} + +func notify(ctx appengine.Context, uid string, tag string, b []byte, reason string) { + if len(uid) == 0 { + return + } + q := datastore.NewQuery("NotifyViaEmail").Filter("UserID =", uid) + for t := q.Run(ctx); ; { + var d NotifyViaEmail + _, err := t.Next(&d) + if err == datastore.Done { + break + } + if err != nil { + log.Printf("Datastore error: %s", err.Error()) + return + } + sendEmail(ctx, d.Address, tag, b, reason) + } +} diff --git a/randomsanity.go b/randomsanity.go new file mode 100644 index 0000000..e3eb3f4 --- /dev/null +++ b/randomsanity.go @@ -0,0 +1,132 @@ +// AppEngine-based server to sanity check byte arrays +// that are supposed to be random. +package randomsanity + +import ( + "appengine" + "encoding/hex" + "fmt" + "net/http" + "strings" + "time" +) + +func init() { + // Main API point, sanity check hex bytes + http.HandleFunc("/v1/q/", submitBytesHandler) + + // Start an email loop to get an id token, to be + // notified via email of failures: + http.HandleFunc("/v1/registeremail/", registerEmailHandler) + + // Remove an id token + http.HandleFunc("/v1/unregister/", unRegisterIDHandler) + + // Development/testing... + http.HandleFunc("/v1/debug", debugHandler) + + // Redirect to www. home page + http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/" { + http.NotFound(w, r) + return + } + http.Redirect(w, r, "http://www.randomsanity.org/", 301) + }) +} + +func debugHandler(w http.ResponseWriter, r *http.Request) { + w.Header().Add("Content-Type", "text/plain") + + // Code useful for development/testing: + + // fmt.Fprint(w, "***r.Header headers***\n") + // r.Header.Write(w) + + // ctx := appengine.NewContext(r) + // fmt.Fprint(w, "Usage data:\n") + // for _, u := range GetUsage(ctx) { + // fmt.Fprintf(w, "%s,%d\n", u.Key, u.N) + // } +} + +func submitBytesHandler(w http.ResponseWriter, r *http.Request) { + parts := strings.Split(r.URL.Path, "/") + if len(parts) != 4 { + http.Error(w, "Invalid GET", http.StatusBadRequest) + return + } + b, err := hex.DecodeString(parts[len(parts)-1]) + if err != nil { + http.Error(w, "Invalid hex", http.StatusBadRequest) + return + } + // Need at least 16 bytes to hit the 1-in-2^60 false positive rate + if len(b) < 16 { + http.Error(w, "Must provide 16 or more bytes", http.StatusBadRequest) + return + } + + ctx := appengine.NewContext(r) + + // Users that register can append id=....&tag=.... so + // they're notified if somebody else submits + // the same random bytes + uID := r.FormValue("id") + dbKey, _ := userID(ctx, uID) + tag := "" + if dbKey == nil { + uID = "" + } else { + tag = r.FormValue("tag") + if len(tag) > 64 { + tag = "" // Tags must be short + } + } + + // Rate-limit by IP address, with a much higher limit for registered users + // If more complicated logic is needed because of abuse a per-user limit + // could be stored in the datastore, but running into the 600-per-hour-per-ip + // limit should be rare (maybe a sysadmin has 200 virtual machines + // behind the same IP address and restarts them more than three times in a hour....) + var ratelimit uint64 = 60 + if len(uID) > 0 { + ratelimit = 600 + } + limited, err := RateLimitResponse(ctx, w, IPKey("q", r.RemoteAddr), ratelimit, time.Hour) + if err != nil || limited { + return + } + + w.Header().Add("Content-Type", "application/json") + + // Returns some randomness caller can use to mix in to + // their PRNG: + addEntropyHeader(w) + + // First, some simple tests for non-random input: + result, reason := LooksRandom(b) + if !result { + RecordUsage(ctx, "Fail_"+reason, 1) + fmt.Fprint(w, "false") + notify(ctx, uID, tag, b, reason) + return + } + + // Try to catch two machines with insufficient starting + // entropy generating identical streams of random bytes. + if len(b) > 64 { + b = b[0:64] // Prevent DoS from excessive datastore lookups + } + unique, err := looksUnique(ctx, w, b, uID, tag) + if err != nil { + return + } + if unique { + RecordUsage(ctx, "Success", 1) + fmt.Fprint(w, "true") + } else { + RecordUsage(ctx, "Fail_Nonunique", 1) + fmt.Fprint(w, "false") + } +} diff --git a/randomsanitystat.go b/randomsanitystat.go new file mode 100644 index 0000000..115bfb8 --- /dev/null +++ b/randomsanitystat.go @@ -0,0 +1,134 @@ +// Fast, simple statistical tests for short (e.g. 256-bit) bitstreams +// +// These are written for a 1-in-2^60 (one in a quintillion) false +// positive rate, approximately, overall. Since multiple tests are +// run, the false positive rate for each should be evern lower; +// individual tests work on 8 byte chunks so have a 1-in-2^64 +// false positive rate. +// +// They are meant to catch catastrophic failures of software or hardware, +// NOT to detect subtle biases. +// +// If you want to detect subtle biases, use one of these extensive +// test suites: +// NIST SP 800-22 +// DieHarder +// TestU01 +// +// If you are a certain type of programmer, you will be tempted to optimize +// the snot out of these; there are lots of clever optimizations that could +// make some of these tests an order or three of magnitude faster. +// Don't. Find something more productive to do. CPU time is really cheap; +// finding somebody willing to spend a half a day reviewing your awesomely +// clever algorithm for detecting stuck bits is expensive. +package randomsanity + +import ( + "encoding/binary" +) + +type decodeF func([]byte) uint64 + +func incrementing(b []byte, bytesPerNum int, fp decodeF) bool { + // Need at least one number plus 64-bits-worth of items + // to be under the 2^60 false positive rate + if len(b) < bytesPerNum+8 { + return false + } + first := fp(b[0:bytesPerNum]) + nNums := len(b) / bytesPerNum + allmatch := true + for i := 1; i < nNums && allmatch; i++ { + n := fp(b[bytesPerNum*i : bytesPerNum*(i+1)]) + if first+uint64(i) != n { + allmatch = false + } + } + return allmatch +} + +// Counting returns true if b contains bytes that can be interpreted +// as incrementing numbers: 8/16/32/64 bytes, big or little endian. +// It is meant to catch programming errors where an array index is used +// instead of some source of random bytes. +func Counting(b []byte) bool { + if incrementing(b, 1, func(b []byte) uint64 { return uint64(b[0]) }) { + return true + } + if incrementing(b, 2, func(b []byte) uint64 { return uint64(binary.LittleEndian.Uint16(b[0:2])) }) { + return true + } + if incrementing(b, 2, func(b []byte) uint64 { return uint64(binary.BigEndian.Uint16(b[0:2])) }) { + return true + } + if incrementing(b, 4, func(b []byte) uint64 { return uint64(binary.LittleEndian.Uint32(b[0:4])) }) { + return true + } + if incrementing(b, 4, func(b []byte) uint64 { return uint64(binary.BigEndian.Uint32(b[0:4])) }) { + return true + } + if incrementing(b, 8, func(b []byte) uint64 { return uint64(binary.LittleEndian.Uint64(b[0:8])) }) { + return true + } + if incrementing(b, 8, func(b []byte) uint64 { return uint64(binary.BigEndian.Uint64(b[0:8])) }) { + return true + } + return false +} + +// Repeated returns true if b contains long runs of repeated bytes +func Repeated(b []byte) bool { + nBytes := len(b) + nRepeated := 0 + + for i := 1; i <= nBytes; i++ { + if b[i-1] == b[i%nBytes] { + nRepeated += 1 + if nRepeated >= 8 { + return true + } + } else { + nRepeated = 0 + } + } + return false +} + +// BitStuck returns true and which bit is stuck if b contains +// long runs of bytes with the same bit set or unset +func BitStuck(b []byte) (bool, uint) { + if len(b) < 64 { + return false, 0 + } + // Create a new byte array with all the low bits, + // etc. Then use Repeated to look for runs of + // zero or one. + for bit := uint(0); bit < uint(8); bit++ { + bb := make([]byte, (len(b)+7)/8) + for i, v := range b { + bb[i/8] |= ((v >> bit) & 0x01) << uint(i%8) + } + if Repeated(bb) { + return true, bit + } + } + return false, 0 +} + +// LooksRandom returns true and an empty string if b passes all +// the tests; otherwise it returns false and a short string describing +// which test failed. +func LooksRandom(b []byte) (bool, string) { + if Repeated(b) { + return false, "Repeated bytes" + } + if Counting(b) { + return false, "Counting" + } + stuck, _ := BitStuck(b) + if stuck { + return false, "Bit stuck" + } + + return true, "" +} diff --git a/randomsanitystat_test.go b/randomsanitystat_test.go new file mode 100644 index 0000000..d6bc1af --- /dev/null +++ b/randomsanitystat_test.go @@ -0,0 +1,132 @@ +package randomsanity + +import ( + "crypto/rand" + "encoding/hex" + "strings" + "testing" +) + +func TestLooksRandom(t *testing.T) { + var tests = []struct { + hexbytes string + want bool + }{ + // Software failure: use counter instead of random source + // (rngstat.Counting tests) + + // 8-bit: start with a random 8-bit value, + // chances that the next 8 bytes (64 bits) happen to look like + // counting up are 1 in 2^64, less than our false-positive rate + {"01 02 03 04 05 06 07 08 09", false}, + {"18 19 1a 1b 1c 1d 1e 1f 20", false}, + + // 16-bit: + {"0000 0001 0002 0003 0004", false}, // big-endian + {"9991 9992 9993 9994 9995", false}, + {"0000 0100 0200 0300 0400", false}, // little-endian + {"9199 9299 9399 9499 9599", false}, + + // 32-bit: + {"00000001 00000002 00000003", false}, // big-endian + {"1111111f 11111120 11111121", false}, + {"01000000 02000000 03000000", false}, // little-endian + {"1f111111 20111111 21111111", false}, + + // 64-bit. Just one 64-bit sequence is enough to be under the + // 2^60 false positive rate. + {"0000000000000001 0000000000000002", false}, // big-endian + {"ac80d400f8cd5946 ac80d400f8cd5947", false}, + {"4edc2837e54241ff 4edc2837e5424200", false}, + {"0100000000000000 0200000000000000", false}, // little-endian + {"ff4132e53728dc4e 004232e53728dc4e", false}, + + // repeated bytes tests + // (rngstat.Repeated tests) + {"00", true}, + {"ff", true}, + {"00000000000000", true}, + {"0000000000000000", false}, + {"ffffffffffffffff", false}, + {"fffffffeffffffff", true}, + {"0100000000000000", true}, + {"ff000000000000000000ff", false}, + {"00ffffffffffffffffff00", false}, + {"aaaaaaaaaaaaaaab", true}, + {"aaaaaaaaaaaaaaaa", false}, + {"ffaaaaaaaaaaaaaaaaaabb", false}, + {"39393939393939ab", true}, + {"3939393939393939", false}, + {"ff393939393939393939bb", false}, + + // stuck bits tests (need 64 bytes or more) + {"136d3d153516244b2a366d7b401131523d453b701f4b7c6d39480710561b5e0a136d3d153516244b2a366d7b401131523d453b701f4b7c6d39480710561b5e0a", false}, // 0x80 bit unset + {"13adbd95b516248baa36ad3b8011b1123d053bb09f0b3c2db9080790961b1e0a13adbd95b516248baa36ad3b8011b1123d053bb09f0b3c2db9080790961b1e0a", false}, // 0x40 bit unset + {"13cd9d95951604cb8a16cd5bc01191521d451bd09f4b5c4d99480790d61b5e0a13cd9d95951604cb8a16cd5bc01191521d451bd09f4b5c4d99480790d61b5e0a", false}, // 0x20 bit unset + {"11edbd95b51424c9a834ed79c011b1503d4539f09d497c6db9480590d4195c0811edbd95b51424c9a834ed79c011b1503d4539f09d497c6db9480590d4195c08", false}, // 0x02 bit unset + {"12ecbc94b41624caaa36ec7ac010b0523c443af09e4a7c6cb8480690d61a5e0a12ecbc94b41624caaa36ec7ac010b0523c443af09e4a7c6cb8480690d61a5e0a", false}, // 0x01 bit unset + {"13efbf97b71626cbaa36ef7bc213b3523f473bf29f4b7e6fbb4a0792d61b5e0a13efbf97b71626cbaa36ef7bc213b3523f473bf29f4b7e6fbb4a0792d61b5e0a", false}, // 0x02 bit set + + // Actual random bitstreams, 1 to 32 bytes + {"8b", true}, + {"6c72", true}, + {"307dd9", true}, + {"69f3171e", true}, + {"64980ad616", true}, + {"bb039395f8de", true}, + {"0eee58c404c82b", true}, + {"b45b237eeca0c59d", true}, + {"1d69df683069246282", true}, + {"81a6cefa3675ed6f04b9", true}, + {"143d92cc0ac0c594169967", true}, + {"a3d5be02d5b77a44793dccb4", true}, + {"98aa8d91d6d732d88c39c8ceec", true}, + {"3b1d9551df40c9330541c17a7ed2", true}, + {"356982f3f3a0a48a13df95245a7330", true}, + {"e47d253e45ccfa65f44493677aaf56ae", true}, + {"92f4752dbfcc23da433c9a8759cc67b330", true}, + {"17c7a1fae0f4a2d9efab4e4081f61afc4970", true}, + {"da8445a72b1c80affd49346f36cb63429eae10", true}, + {"be5d96f4a70273c960b3ce27997d6e388aac5e6b", true}, + {"17872e3aadb230cdeec35335fc6d3e4bf4ccc45b29", true}, + {"e9c5f8819c861b6e58af10e77233eac07328a1b51466", true}, + {"48fd3700fea9515416527f5834519ab25ce418e152e7c2", true}, + {"db80540a4bca01e1f218fb3162afe3ed6d4552fea89228bb", true}, + {"c96c862bc74fa6d6d2f026868b7a611e1650ab28500eb161db", true}, + {"44fce84f7a38be9532caf56ad5b8911f5756629e8402778a61f1", true}, + {"8d637674c809bd2ab7b20a6dae939176a4ed7fb54e95e1a4a31db6", true}, + {"4e811093195e9e7236a071c6c386650c374661d50cd802b86cfbe4a3", true}, + {"194d61bdd628f380916746f6804eaa83f7919fa87dffd3bee80c1b4be8", true}, + {"d1d648be784a79b0fde0a2f79562c1576643f0d322ff73163dd960c9a7a0", true}, + {"4724b307af612288395831874016ede4f3ba2d41df40c3884f1ff1b9c05ac3", true}, + {"13edbd95b51624cbaa36ed7bc011b1523d453bf09f4b7c6db9480790d61b5e0a", true}, + } + + for _, test := range tests { + b, err := hex.DecodeString(strings.Replace(test.hexbytes, " ", "", -1)) + if err != nil { + panic(err) + } + if got, which := LooksRandom(b); got != test.want { + if which != "" { + t.Errorf("LooksRandom(%q) = %v (%s)", test.hexbytes, got, which) + } else { + t.Errorf("LooksRandom(%q) = %v", test.hexbytes, got) + } + } + } +} + +func BenchmarkLooksRandom(b *testing.B) { + var rhash [128]byte + for i := 0; i < b.N; i++ { + _, err := rand.Read(rhash[:]) + if err != nil { + panic(err) + } + r, t := LooksRandom(rhash[:]) + if r == false { + b.Errorf("%s failed LooksRandom (%s)", hex.EncodeToString(rhash[:]), t) + } + } +} diff --git a/ratelimit.go b/ratelimit.go new file mode 100644 index 0000000..9a9e12b --- /dev/null +++ b/ratelimit.go @@ -0,0 +1,63 @@ +package randomsanity + +import ( + "appengine" + "appengine/memcache" + "fmt" + "net/http" + "strings" + "time" +) + +// Limit something (identified by key) to at most max per timespan +// State stored in the memcache, so this is "best-effort" +// Returns true if rate limit is hit. +func RateLimit(ctx appengine.Context, key string, max uint64, timespan time.Duration) (bool, error) { + value, err := memcache.Increment(ctx, key, -1, max+1) + if err != nil { + return false, err + } + // value 0 : ran into request limit + if value == 0 { + return true, nil + } + // value max means it wasn't set before, so + // rewrite to set correct expiration time: + if value == max { + item, err := memcache.Get(ctx, key) + if err != nil { + return false, err + } + item.Expiration = timespan + // There is a race condition here, but it is mostly harmless + // (extra requests above the rate limit could slip through) + memcache.Set(ctx, item) + } + return false, nil +} + +// Rate limit, and write stuff to w: +func RateLimitResponse(ctx appengine.Context, w http.ResponseWriter, key string, max uint64, timespan time.Duration) (bool, error) { + limit, err := RateLimit(ctx, key, max, timespan) + if err != nil { + http.Error(w, "RateLimit error", http.StatusInternalServerError) + return false, err + } + if limit { + w.Header().Add("Content-Type", "text/plain") + w.WriteHeader(http.StatusTooManyRequests) + fmt.Fprint(w, "Request limit exceeded") + return true, nil + } + return false, nil +} + +// Get a reasonable memcache key from IPv4 or IPv6 address +func IPKey(prefix string, ipaddr string) string { + // If it is a super-long IPv6: use first four parts + ipv6parts := strings.Split(ipaddr, ":") + if len(ipv6parts) > 4 { + return prefix + strings.Join(ipv6parts[0:4], ":") + } + return prefix + ipaddr +} diff --git a/unique.go b/unique.go new file mode 100644 index 0000000..08cf45c --- /dev/null +++ b/unique.go @@ -0,0 +1,211 @@ +package randomsanity + +// Best-effort "have we ever seen this array of bytes before?" + +import ( + "appengine" + "appengine/datastore" + "bytes" + "crypto/aes" + "crypto/rand" + "net/http" + "time" +) + +func looksUnique(ctx appengine.Context, w http.ResponseWriter, b []byte, uID string, tag string) (bool, error) { + // Test every 16-byte (128-bit) sequence in the input against our database + + // if we get a match, complain! + match, i, err := unique(ctx, b[:], uID, tag) + + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return true, err + } + if match != nil { + notify(ctx, uID, tag, b[i:i+16], "Non Unique") + if len(match.UserID) > 0 && match.UserID != uID { + notify(ctx, match.UserID, match.Tag, b[i:i+16], "Non Unique") + } + return false, nil + } + return true, nil +} + +// +// Entities in the 'RB' datastore; +// storing 16 "random we hope" bytes. +// +// First prefixBytes bytes are used as they key, +// the rest are stored as the value, collisions just +// result in multiple values under one key, oldest +// entries first. +// +// The simplest possible storage scheme would be +// 16-byte keys, but that is HUGELY inefficient. +// +// Why 128 bits? We want a false positive rate under +// 1-in-2^60. We're basically running a 'birthday attack' +// so comparing random 128-bit chunks we get +// a chance of collision of any pair of about 1-in-2^64 +// + +const prefixBytes = 4 // Use 4 for production, 1 for development/testing collisions + +type RngUniqueBytesEntry struct { + Trailing []byte `datastore:",noindex"` + Time int64 `datastore:",noindex"` + UserID string `datastore:",noindex"` + Tag string `datastore:",noindex"` +} +type RngUniqueBytes struct { + Hits []RngUniqueBytesEntry `datastore:",noindex"` +} + +type SecretBytes struct { + Secret []byte `datastore:",noindex"` + CreationTime int64 +} + +func secretKey(ctx appengine.Context) ([]byte, error) { + var result []byte + + // Create random secret if it doesn't already exist: + var secrets []SecretBytes + + q := datastore.NewQuery("SecretBytes") + if _, err := q.GetAll(ctx, &secrets); err != nil { + return result, err + } + if len(secrets) == 0 { + var b [16]byte + if _, err := rand.Read(b[:]); err != nil { + return result, err + } + result = b[:] + secret := SecretBytes{result, time.Now().Unix()} + k := datastore.NewIncompleteKey(ctx, "SecretBytes", nil) + if _, err := datastore.Put(ctx, k, &secret); err != nil { + return result, err + } + } else { + result = secrets[0].Secret + } + return result, nil +} + +func i64(b []byte) int64 { + var result int64 + for i := uint(0); i < uint(len(b)) && i < 8; i++ { + result = result | (int64(b[i]) << (i * 8)) + } + return result +} + +func dealWithMultiError(err error) error { + // GetMulti returns either plain errors OR + // an appengine.MultiError that is an array + // of errors. We're OK if all the 'errors' + // are ErrNoSuchEntity; otherwise, + // we'll report the first error + switch err.(type) { + case nil: + return nil + case appengine.MultiError: + m := err.(appengine.MultiError) + for _, e := range m { + if e == nil || e == datastore.ErrNoSuchEntity { + continue + } + return e + } + return nil + default: + return err + } + return err +} + +func unique(ctx appengine.Context, b []byte, uID string, tag string) (*RngUniqueBytesEntry, int, error) { + n := len(b) - 15 // Number of queries + keys := make([]*datastore.Key, n) + vals := make([]*RngUniqueBytes, n) + + // Input is first be run through AES-128 encryption, to prevent an attacker + // from intentionally causing database entry collisions. + secret, err := secretKey(ctx) + if err != nil { + return nil, 0, err + } + cipher, err := aes.NewCipher(secret) + if err != nil { + return nil, 0, err + } + + chunks := make([][]byte, n) + for i := 0; i < n; i++ { + chunks[i] = make([]byte, 16) + cipher.Encrypt(chunks[i], b[i:i+16]) + + keys[i] = datastore.NewKey(ctx, "RB", "", 1+i64(chunks[i][0:prefixBytes]), nil) + vals[i] = new(RngUniqueBytes) + } + err = datastore.GetMulti(ctx, keys, vals) + err = dealWithMultiError(err) + + if err != nil { + return nil, 0, err + } + for i, hit := range vals { + for _, h := range hit.Hits { + if bytes.Equal(h.Trailing, chunks[i][prefixBytes:]) { + // Rewriting keeps this entry from getting evicted + // and overwriting the userid/tag prevents the + // user from getting too many notifications + write(ctx, chunks[i][:], time.Now().Unix(), "", "") + return &h, i, nil // ... full match! + } + } + } + // If no matches, store the first and last 16 bytes. Any future + // overlapping sequences will trigger a match. + err = write(ctx, chunks[0][:], time.Now().Unix(), uID, tag) + if err == nil && n > 1 { + err = write(ctx, chunks[n-1][:], time.Now().Unix(), uID, tag) + } + if err != nil { + return nil, 0, err + } + return nil, 0, nil +} + +func write(ctx appengine.Context, b []byte, t int64, uID string, tag string) error { + const maxEntriesPerKey = 100 + + key := datastore.NewKey(ctx, "RB", "", 1+i64(b[0:prefixBytes]), nil) + + err := datastore.RunInTransaction(ctx, func(ctx appengine.Context) error { + hit := new(RngUniqueBytes) + err := datastore.Get(ctx, key, hit) + if err != nil && err != datastore.ErrNoSuchEntity { + return err + } + // Find and remove old entry (if any): + hits := hit.Hits[:0] + for _, h := range hit.Hits { + if !bytes.Equal(h.Trailing, b[prefixBytes:]) { + hits = append(hits, h) + } + } + // Append new: + e := RngUniqueBytesEntry{Trailing: b[prefixBytes:], Time: t, UserID: uID, Tag: tag} + hit.Hits = append(hits, e) + // Throw out half the old if bucket overflows: + if len(hit.Hits) > maxEntriesPerKey { + hit.Hits = hit.Hits[len(hit.Hits)/2:] + } + _, err = datastore.Put(ctx, key, hit) + return err + }, nil) + return err +} diff --git a/usage.go b/usage.go new file mode 100644 index 0000000..67128dd --- /dev/null +++ b/usage.go @@ -0,0 +1,51 @@ +package randomsanity + +import ( + "appengine" + "appengine/datastore" + "log" + "math/rand" // don't need cryptographically secure randomness here +) + +// Keep track of usage stats + +// If frequency of database writes becomes a problem, increase SAMPLING_FACTOR +// to only write about every SAMPLING_FACTOR usages. +const SAMPLING_FACTOR = 1 + +type UsageRecord struct { + Key string + N int64 `datastore:",noindex"` +} + +func RecordUsage(ctx appengine.Context, k string, n int64) { + if rand.Intn(SAMPLING_FACTOR) != 0 { + return + } + key := datastore.NewKey(ctx, "UsageRecord", k, 0, nil) + + err := datastore.RunInTransaction(ctx, func(ctx appengine.Context) error { + r := UsageRecord{Key: k, N: 0} + err := datastore.Get(ctx, key, &r) + if err != nil && err != datastore.ErrNoSuchEntity { + return err + } + r.N += n * SAMPLING_FACTOR + _, err = datastore.Put(ctx, key, &r) + return err + }, nil) + if err != nil { + log.Printf("Datastore error: %s", err.Error()) + } +} + +func GetUsage(ctx appengine.Context) []UsageRecord { + var results []UsageRecord + + q := datastore.NewQuery("UsageRecord") + _, err := q.GetAll(ctx, &results) + if err != nil { + log.Printf("Datastore error: %s", err.Error()) + } + return results +}