* Combine cassandra clusters into a single one

* Add a new Cassandra CF for comment-sort caching * Add JSONP support * Support non-auto-renewing PayPal IPNs * Gold accounting * Gold features: comments tracking, larger comment limit * Autorenewing gold * Google checkout support * Profile-page sorting for all * Title-text on the logo * Hardcache sharding and profiling * Self serve enhancements * Add /r/foo/faq * Make self-centred reddits allow for more verbose selftext * Much better down-handling of databases * Add the ability to take a thread-dump from a running process * Remove the comscore tracker * Add comments to modqueue (does not back-populate)
reddit-archive · Oct 18, 2010 · 37e2ba9 · 37e2ba9
1 parent 0ae8f2f
commit 37e2ba9
Show file tree

Hide file tree

Showing 96 changed files with 3,044 additions and 893 deletions.
diff --git a/config/cassandra/storage-conf.xml b/config/cassandra/storage-conf.xml
@@ -1,24 +1,154 @@
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements.  See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership.  The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License.  You may obtain a copy of the License at
+ ~
+ ~    http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied.  See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+-->
 <Storage>
   <!--======================================================================-->
   <!-- Basic Configuration                                                  -->
   <!--======================================================================-->
 
+  <!-- 
+   ~ The name of this cluster.  This is mainly used to prevent machines in
+   ~ one logical cluster from joining another.
+  -->
   <ClusterName>reddit</ClusterName>
 
+  <!--
+   ~ Turn on to make new [non-seed] nodes automatically migrate the right data 
+   ~ to themselves.  (If no InitialToken is specified, they will pick one 
+   ~ such that they will get half the range of the most-loaded node.)
+   ~ If a node starts up without bootstrapping, it will mark itself bootstrapped
+   ~ so that you can't subsequently accidently bootstrap a node with
+   ~ data on it.  (You can reset this by wiping your data and commitlog
+   ~ directories.)
+   ~
+   ~ Off by default so that new clusters and upgraders from 0.4 don't
+   ~ bootstrap immediately.  You should turn this on when you start adding
+   ~ new nodes to a cluster that already has data on it.  (If you are upgrading
+   ~ from 0.4, start your cluster with it off once before changing it to true.
+   ~ Otherwise, no data will be lost but you will incur a lot of unnecessary
+   ~ I/O before your cluster starts up.)
+  -->
   <AutoBootstrap>false</AutoBootstrap>
+
+  <!--
+   ~ See http://wiki.apache.org/cassandra/HintedHandoff
+  -->
   <HintedHandoffEnabled>true</HintedHandoffEnabled>
 
-  <Keyspaces>
-    <Keyspace Name="permacache">
-      <ColumnFamily CompareWith="BytesType" Name="permacache" RowsCached="3000000" />
+  <!--
+   ~ Keyspaces and ColumnFamilies:
+   ~ A ColumnFamily is the Cassandra concept closest to a relational
+   ~ table.  Keyspaces are separate groups of ColumnFamilies.  Except in
+   ~ very unusual circumstances you will have one Keyspace per application.
 
+   ~ There is an implicit keyspace named 'system' for Cassandra internals.
+  -->
+  <Keyspaces>
+    <Keyspace Name="Keyspace1">
+      <!--
+       ~ ColumnFamily definitions have one required attribute (Name)
+       ~ and several optional ones.
+       ~
+       ~ The CompareWith attribute tells Cassandra how to sort the columns
+       ~ for slicing operations.  The default is BytesType, which is a
+       ~ straightforward lexical comparison of the bytes in each column.
+       ~ Other options are AsciiType, UTF8Type, LexicalUUIDType, TimeUUIDType,
+       ~ and LongType.  You can also specify the fully-qualified class
+       ~ name to a class of your choice extending
+       ~ org.apache.cassandra.db.marshal.AbstractType.
+       ~ 
+       ~ SuperColumns have a similar CompareSubcolumnsWith attribute.
+       ~ 
+       ~ BytesType: Simple sort by byte value.  No validation is performed.
+       ~ AsciiType: Like BytesType, but validates that the input can be 
+       ~            parsed as US-ASCII.
+       ~ UTF8Type: A string encoded as UTF8
+       ~ LongType: A 64bit long
+       ~ LexicalUUIDType: A 128bit UUID, compared lexically (by byte value)
+       ~ TimeUUIDType: a 128bit version 1 UUID, compared by timestamp
+       ~
+       ~ (To get the closest approximation to 0.3-style supercolumns, you
+       ~ would use CompareWith=UTF8Type CompareSubcolumnsWith=LongType.)
+       ~
+       ~ An optional `Comment` attribute may be used to attach additional
+       ~ human-readable information about the column family to its definition.
+       ~ 
+       ~ The optional KeysCached attribute specifies
+       ~ the number of keys per sstable whose locations we keep in
+       ~ memory in "mostly LRU" order.  (JUST the key locations, NOT any
+       ~ column values.) Specify a fraction (value less than 1), a percentage
+       ~ (ending in a % sign) or an absolute number of keys to cache.
+       ~ KeysCached defaults to 200000 keys.
+       ~
+       ~ The optional RowsCached attribute specifies the number of rows
+       ~ whose entire contents we cache in memory. Do not use this on
+       ~ ColumnFamilies with large rows, or ColumnFamilies with high write:read
+       ~ ratios. Specify a fraction (value less than 1), a percentage (ending in
+       ~ a % sign) or an absolute number of rows to cache. 
+       ~ RowsCached defaults to 0, i.e., row cache is off by default.
+       ~
+       ~ Remember, when using caches as a percentage, they WILL grow with
+       ~ your data set!
+      -->
+      <ColumnFamily Name="Standard1" CompareWith="BytesType"/>
+      <ColumnFamily Name="Standard2" 
+                    CompareWith="UTF8Type"
+                    KeysCached="100%"/>
+      <ColumnFamily Name="StandardByUUID1" CompareWith="TimeUUIDType" />
+      <ColumnFamily Name="Super1"
+                    ColumnType="Super"
+                    CompareWith="BytesType"
+                    CompareSubcolumnsWith="BytesType" />
+      <ColumnFamily Name="Super2"
+                    ColumnType="Super"
+                    CompareWith="UTF8Type"
+                    CompareSubcolumnsWith="UTF8Type"
+                    RowsCached="10000"
+                    KeysCached="50%"
+                    Comment="A column family with supercolumns, whose column and subcolumn names are UTF8 strings"/>
+
+      <!--
+       ~ Strategy: Setting this to the class that implements
+       ~ IReplicaPlacementStrategy will change the way the node picker works.
+       ~ Out of the box, Cassandra provides
+       ~ org.apache.cassandra.locator.RackUnawareStrategy and
+       ~ org.apache.cassandra.locator.RackAwareStrategy (place one replica in
+       ~ a different datacenter, and the others on different racks in the same
+       ~ one.)
+      -->
       <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
-      <ReplicationFactor>3</ReplicationFactor>
-      <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
-    </Keyspace>
 
-    <Keyspace Name="urls">
-      <ColumnFamily CompareWith="UTF8Type" Name="urls" />
+      <!-- Number of replicas of the data -->
+      <ReplicationFactor>1</ReplicationFactor>
+
+      <!--
+       ~ EndPointSnitch: Setting this to the class that implements
+       ~ AbstractEndpointSnitch, which lets Cassandra know enough
+       ~ about your network topology to route requests efficiently.
+       ~ Out of the box, Cassandra provides org.apache.cassandra.locator.EndPointSnitch,
+       ~ and PropertyFileEndPointSnitch is available in contrib/.
+      -->
+       <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
+     </Keyspace>
+
+    <Keyspace Name="permacache">
+      <ColumnFamily CompareWith="BytesType" Name="permacache" />
+      <ColumnFamily CompareWith="BytesType" Name="urls" RowsCached="100000" />
 
       <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
       <ReplicationFactor>3</ReplicationFactor>
@@ -32,6 +162,7 @@
 
       <!-- Views -->
       <ColumnFamily CompareWith="UTF8Type" Name="VotesByLink" />
+      <ColumnFamily CompareWith="UTF8Type" Name="CommentSortsCache" RowsCached="100000" />
 
       <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
       <ReplicationFactor>3</ReplicationFactor>
@@ -40,24 +171,76 @@
 
   </Keyspaces>
 
+  <!--
+   ~ Authenticator: any IAuthenticator may be used, including your own as long
+   ~ as it is on the classpath.  Out of the box, Cassandra provides
+   ~ org.apache.cassandra.auth.AllowAllAuthenticator and,
+   ~ org.apache.cassandra.auth.SimpleAuthenticator 
+   ~ (SimpleAuthenticator uses access.properties and passwd.properties by
+   ~ default).
+   ~
+   ~ If you don't specify an authenticator, AllowAllAuthenticator is used.
+  -->
   <Authenticator>org.apache.cassandra.auth.AllowAllAuthenticator</Authenticator>
 
+  <!--
+   ~ Partitioner: any IPartitioner may be used, including your own as long
+   ~ as it is on the classpath.  Out of the box, Cassandra provides
+   ~ org.apache.cassandra.dht.RandomPartitioner,
+   ~ org.apache.cassandra.dht.OrderPreservingPartitioner, and
+   ~ org.apache.cassandra.dht.CollatingOrderPreservingPartitioner.
+   ~ (CollatingOPP colates according to EN,US rules, not naive byte
+   ~ ordering.  Use this as an example if you need locale-aware collation.)
+   ~ Range queries require using an order-preserving partitioner.
+   ~
+   ~ Achtung!  Changing this parameter requires wiping your data
+   ~ directories, since the partitioner can modify the sstable on-disk
+   ~ format.
+  -->
   <Partitioner>org.apache.cassandra.dht.RandomPartitioner</Partitioner>
 
+  <!--
+   ~ If you are using an order-preserving partitioner and you know your key
+   ~ distribution, you can specify the token for this node to use. (Keys
+   ~ are sent to the node with the "closest" token, so distributing your
+   ~ tokens equally along the key distribution space will spread keys
+   ~ evenly across your cluster.)  This setting is only checked the first
+   ~ time a node is started. 
+
+   ~ This can also be useful with RandomPartitioner to force equal spacing
+   ~ of tokens around the hash space, especially for clusters with a small
+   ~ number of nodes.
+  -->
   <InitialToken></InitialToken>
 
+  <!--
+   ~ Directories: Specify where Cassandra should store different data on
+   ~ disk.  Keep the data disks and the CommitLog disks separate for best
+   ~ performance
+  -->
   <CommitLogDirectory>/cassandra/commitlog</CommitLogDirectory>
   <DataFileDirectories>
       <DataFileDirectory>/cassandra/data</DataFileDirectory>
   </DataFileDirectories>
 
+  <!--
+   ~ Addresses of hosts that are deemed contact points. Cassandra nodes
+   ~ use this list of hosts to find each other and learn the topology of
+   ~ the ring. You must change this if you are running multiple nodes!
+  -->
   <Seeds>
       <Seed>pmc01</Seed>
       <Seed>pmc02</Seed>
       <Seed>pmc03</Seed>
+      <Seed>pmc04</Seed>
+      <Seed>pmc05</Seed>
       <Seed>pmc06</Seed>
       <Seed>pmc07</Seed>
       <Seed>pmc08</Seed>
+      <Seed>pmc09</Seed>
+      <Seed>pmc10</Seed>
+      <Seed>pmc11</Seed>
+      <Seed>pmc12</Seed>
   </Seeds>
 
   <!-- Miscellaneous -->
@@ -70,16 +253,38 @@
   <!-- Size to allow commitlog to grow to before creating a new segment -->
   <CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>
 
+
   <!-- Local hosts and ports -->
 
+  <!-- 
+   ~ Address to bind to and tell other nodes to connect to.  You _must_
+   ~ change this if you want multiple nodes to be able to communicate!  
+   ~
+   ~ Leaving it blank leaves it up to InetAddress.getLocalHost(). This
+   ~ will always do the Right Thing *if* the node is properly configured
+   ~ (hostname, name resolution, etc), and the Right Thing is to use the
+   ~ address associated with the hostname (it might not be).
+  -->
   <ListenAddress></ListenAddress>
   <!-- internal communications port -->
   <StoragePort>7000</StoragePort>
 
+  <!--
+   ~ The address to bind the Thrift RPC service to. Unlike ListenAddress
+   ~ above, you *can* specify 0.0.0.0 here if you want Thrift to listen on
+   ~ all interfaces.
+   ~
+   ~ Leaving this blank has the same effect it does for ListenAddress,
+   ~ (i.e. it will be based on the configured hostname of the node).
+  -->
   <ThriftAddress></ThriftAddress>
   <!-- Thrift RPC port (the port clients connect to). -->
   <ThriftPort>9160</ThriftPort>
-
+  <!-- 
+   ~ Whether or not to use a framed transport for Thrift. If this option
+   ~ is set to true then you must also use a framed transport on the 
+   ~ client-side, (framed and non-framed transports are not compatible).
+  -->
   <ThriftFramedTransport>false</ThriftFramedTransport>
 
 
@@ -143,7 +348,7 @@
    ~ actual heap memory usage (there is some overhead in indexing the
    ~ columns).
   -->
-  <MemtableThroughputInMB>64</MemtableThroughputInMB>
+  <MemtableThroughputInMB>128</MemtableThroughputInMB>
   <!--
    ~ Throughput setting for Binary Memtables.  Typically these are
    ~ used for bulk load so you want them to be larger.
@@ -161,8 +366,7 @@
    ~ commit log segment, that segment cannot be deleted.)
    ~ This needs to be large enough that it won't cause a flush storm
    ~ of all your memtables flushing at once because none has hit
-   ~ the size or count thresholds yet.  For production, a larger
-   ~ value such as 1440 is recommended.
+   ~ the size or count thresholds yet.
   -->
   <MemtableFlushAfterMinutes>60</MemtableFlushAfterMinutes>
 

diff --git a/r2/draw_load.py b/r2/draw_load.py
@@ -26,7 +26,7 @@ def draw_load(row_size = 12, width = 200, out_file = "/tmp/load.png"):
 
     number = (len([x for x in hosts if x.services]) + 
               len([x for x in hosts if x.database]) +
-              sum(len(x.queue.queues) for x in hosts if x.queue)) + 9
+              sum(len(x.queue.queues) for x in hosts if x.queue)) + 14
 
     im = Image.new("RGB", (width, number * row_size + 3))
     draw = ImageDraw.Draw(im)
@@ -43,7 +43,7 @@ def draw_box(label, color, center = False):
 
     draw_box(" ==== DATABASES ==== ", "#BBBBBB", center = True)
     for host in hosts:
-        if host.database:
+        if host.database or host.host.startswith('vote'):
             draw_box("  %s load: %s" % (host.host, host.load()),
                      get_load_level(host))
 

diff --git a/r2/example.ini b/r2/example.ini
@@ -138,6 +138,8 @@ authorize_db =   reddit,   127.0.0.1, reddit,   password
 award_db =       reddit,   127.0.0.1, reddit,   password
 hc_db =          reddit,   127.0.0.1, reddit,   password
 
+hardcache_categories = *:hc
+
 # this setting will prefix all of the table names
 db_app_name = reddit
 # are we allowed to create tables?
@@ -302,9 +304,11 @@ HOT_PAGE_AGE = 1000
 # how long to consider links eligible for the rising page
 rising_period = 12 hours
 # max number of comments (default)
-num_comments = 200
-# max number of comments (if show all is selected)
+num_comments = 100
+# max number of comments (non-gold)
 max_comments = 500
+# max number of comments (gold)
+max_comments_gold = 2500
 # list of reddits to auto-subscribe users to
 automatic_reddits = 
 # special reddit that only reddit gold subscribers can use
@@ -315,6 +319,10 @@ num_default_reddits = 10
 num_serendipity = 250
 sr_dropdown_threshold = 15
 
+# Conflate visits to a comment page that happen within this many
+# seconds of each other
+comment_visits_period = 600
+
 #user-agents to rate-limit
 agents = 
 

diff --git a/r2/r2/config/middleware.py b/r2/r2/config/middleware.py
@@ -40,6 +40,7 @@
 from r2.lib.html_source import HTMLValidationParser
 from cStringIO import StringIO
 import sys, tempfile, urllib, re, os, sha, subprocess
+from httplib import HTTPConnection
 
 #from pylons.middleware import error_mapper
 def error_mapper(code, message, environ, global_conf=None, **kw):
@@ -308,6 +309,21 @@ def __call__(self, environ, start_response):
             # subdomains to disregard completely
             if sd in ('www', 'origin', 'beta', 'pay'):
                 continue
+            elif sd == 'blog':
+                r = Response()
+                try:
+                    conn = HTTPConnection(config['global_conf']['blog_host'])
+                    conn.request("GET", environ['PATH_INFO'], None,
+                                 {"Host": "blog.reddit.com"})
+                    res = conn.getresponse()
+                    r.status_code = res.status
+                    r.content = res.read()
+                    conn.close()
+                except:
+                    r.status_code = 500
+                    environ['HTTP_HOST'] = base_domain
+                    r.content = "failed to load blog"
+                return r(environ, start_response)
             # subdomains which change the extension
             elif sd == 'm':
                 environ['reddit-domain-extension'] = 'mobile'