Shopify · trishume · Aug 30, 2013 · Jul 24, 2013 · Jul 24, 2013 · Jul 24, 2013
diff --git a/README.md b/README.md
@@ -47,4 +47,22 @@ For standard use you can just pass it the content of a file and call render with
 @template.render('name' => 'tobi')                # => "hi tobi"
 ```
 
+### Error Modes
+
+Setting the error mode of Liquid lets you specify how strictly you want your templates to be interpreted.
+Normally the parser is very lax and will accept almost anything without error. Unfortunately this can make
+it very hard to debug and can lead to unexpected behaviour. 
+
+Liquid also comes with a stricter parser that can be used when editing templates to give better error messages
+when templates are invalid. You can enable this new parser like this:
+
+```ruby
+Liquid::Template.error_mode = :strict # Raises a SyntaxError when invalid syntax is used
+Liquid::Template.error_mode = :warn # Adds errors to template.errors but continues as normal
+Liquid::Template.error_mode = :lax # The default mode, accepts almost anything.
+```
+
+It is recommended that you enable `:strict` or `:warn` mode on new apps to stop invalid templates from being created.
+It is also recommended that you use it in the template editors of existing apps to give editors better error messages.
+
 [![Build Status](https://secure.travis-ci.org/Shopify/liquid.png)](http://travis-ci.org/Shopify/liquid)
diff --git a/Rakefile b/Rakefile
@@ -7,12 +7,25 @@ require 'rubygems/package_task'
 
 task :default => 'test'
 
-Rake::TestTask.new(:test) do |t|
+Rake::TestTask.new(:lax_test) do |t|
   t.libs << '.' << 'lib' << 'test'
   t.test_files = FileList['test/liquid/**/*_test.rb']
+  t.options = 'lax'
   t.verbose = false
 end
 
+Rake::TestTask.new(:strict_test) do |t|
+  t.libs << '.' << 'lib' << 'test'
+  t.test_files = FileList['test/liquid/**/*_test.rb']
+  t.verbose = false
+end
+
+desc 'runs test suite with both strict and lax parsers'
+task :test do
+  Rake::Task['lax_test'].invoke
+  Rake::Task['strict_test'].invoke
+end
+
 gemspec = eval(File.read('liquid.gemspec'))
 Gem::PackageTask.new(gemspec) do |pkg|
   pkg.gem_spec = gemspec
@@ -27,9 +40,13 @@ namespace :benchmark do
 
   desc "Run the liquid benchmark"
   task :run do
-    ruby "./performance/benchmark.rb"
+    ruby "./performance/benchmark.rb strict"
   end
 
+  desc "Run the liquid benchmark with lax parsing"
+  task :lax do
+    ruby "./performance/benchmark.rb lax"
+  end
 end
 
 

diff --git a/lib/liquid.rb b/lib/liquid.rb
@@ -46,6 +46,8 @@ module Liquid
 end
 
 require "liquid/version"
+require 'liquid/lexer'
+require 'liquid/parser'
 require 'liquid/drop'
 require 'liquid/extensions'
 require 'liquid/errors'

diff --git a/lib/liquid/block.rb b/lib/liquid/block.rb
@@ -28,7 +28,7 @@ def parse(tokens)
 
             # fetch the tag from registered blocks
             if tag = Template.tags[$1]
-              new_tag = tag.new($1, $2, tokens)
+              new_tag = tag.new_with_options($1, $2, tokens, @options || {})
               @blank &&= new_tag.blank?
               @nodelist << new_tag
             else
@@ -80,7 +80,7 @@ def block_name
 
     def create_variable(token)
       token.scan(ContentOfVariable) do |content|
-        return Variable.new(content.first)
+        return Variable.new(content.first, @options)
       end
       raise SyntaxError.new("Variable '#{token}' was not properly terminated with regexp: #{VariableEnd.inspect} ")
     end

diff --git a/lib/liquid/document.rb b/lib/liquid/document.rb
@@ -1,7 +1,8 @@
 module Liquid
   class Document < Block
     # we don't need markup to open this block
-    def initialize(tokens)
+    def initialize(tokens, options = {})
+      @options = options
       parse(tokens)
     end
 

diff --git a/lib/liquid/lexer.rb b/lib/liquid/lexer.rb
@@ -0,0 +1,64 @@
+require "strscan"
+module Liquid
+  class Lexer
+    SPECIALS = {
+      '|' => :pipe,
+      '.' => :dot,
+      ':' => :colon,
+      ',' => :comma,
+      '[' => :open_square,
+      ']' => :close_square,
+      '(' => :open_round,
+      ')' => :close_round
+    }
+    IDENTIFIER = /[\w\-?!]+/
+    SINGLE_STRING_LITERAL = /'[^\']*'/
+    DOUBLE_STRING_LITERAL = /"[^\"]*"/
+    NUMBER_LITERAL = /-?\d+(\.\d+)?/
+    COMPARISON_OPERATOR = /==|!=|<>|<=?|>=?|contains/
+
+    def initialize(input)
+      @ss = StringScanner.new(input)
+    end
+
+    def tokenize
+      @output = []
+
+      loop do
+        @ss.skip(/\s*/)
+
+        tok = case
+        when @ss.eos? then nil
+        when t = @ss.scan(COMPARISON_OPERATOR) then [:comparison, t]
+        when t = @ss.scan(SINGLE_STRING_LITERAL) then [:string, t]
+        when t = @ss.scan(DOUBLE_STRING_LITERAL) then [:string, t]
+        when t = @ss.scan(NUMBER_LITERAL) then [:number, t]
+        when t = @ss.scan(IDENTIFIER) then [:id, t]
+        else
+          c = @ss.getch
+          if s = SPECIALS[c]
+            [s,c]
+          else
+            raise SyntaxError, "Unexpected character #{c}."
+          end
+        end
+
+        unless tok
+          @output << [:end_of_string]
+          return @output
+        end
+        @output << tok
+      end
+    end
+
+    protected
+    def lex_specials
+      c = @ss.getch
+      if s = SPECIALS[c]
+        return Token.new(s,c)
+      end
+
+      raise SyntaxError, "Unexpected character #{c}."
+    end
+  end
+end
diff --git a/lib/liquid/parser.rb b/lib/liquid/parser.rb
@@ -0,0 +1,95 @@
+module Liquid
+  # This class is used by tags to parse themselves
+  # it provides helpers and encapsulates state
+  class Parser
+    def initialize(input)
+      l = Lexer.new(input)
+      @tokens = l.tokenize
+      @p = 0 # pointer to current location
+    end
+
+    def jump(point)
+      @p = point
+    end
+
+    def consume(type = nil)
+      token = @tokens[@p]
+      if type && token[0] != type
+        raise SyntaxError, "Expected #{type} but found #{@tokens[@p]}"
+      end
+      @p += 1
+      token[1]
+    end
+
+    # Only consumes the token if it matches the type
+    # Returns the token's contents if it was consumed
+    # or false otherwise.
+    def consume?(type)
+      token = @tokens[@p]
+      return false unless token && token[0] == type
+      @p += 1
+      token[1]
+    end
+
+    # Like consume? Except for an :id token of a certain name
+    def id?(str)
+      token = @tokens[@p]
+      return false unless token && token[0] == :id
+      return false unless token[1] == str
+      @p += 1
+      token[1]
+    end
+
+    def look(type, ahead = 0)
+      tok = @tokens[@p + ahead]
+      return false unless tok
+      tok[0] == type
+    end
+
+    # === General Liquid parsing functions ===
+
+    def expression
+      token = @tokens[@p]
+      if token[0] == :id
+        variable_signature
+      elsif [:string, :number].include? token[0]
+        consume
+        token[1]
+      elsif token.first == :open_round
+        consume
+        first = expression
+        consume(:dot)
+        consume(:dot)
+        last = expression
+        consume(:close_round)
+        "(#{first}..#{last})"
+      else
+        raise SyntaxError, "#{token} is not a valid expression."
+      end
+    end
+
+    def argument
+      str = ""
+      # might be a keyword argument (identifier: expression)
+      if look(:id) && look(:colon, 1)
+        str << consume << consume << ' '
+      end
+
+      str << expression
+    end
+
+    def variable_signature
+      str = consume(:id)
+      if look(:open_square)
+        str << consume
+        str << expression
+        str << consume(:close_square)
+      end
+      if look(:dot)
+        str << consume
+        str << variable_signature
+      end
+      str
+    end
+  end
+end
diff --git a/lib/liquid/tag.rb b/lib/liquid/tag.rb
@@ -1,10 +1,26 @@
 module Liquid
   class Tag
-    attr_accessor :nodelist
+    attr_accessor :nodelist, :options
+
+    def self.new_with_options(tag_name, markup, tokens, options)
+      # Forgive me Matz for I have sinned.
+      # I have forsaken the holy idioms of Ruby and used Class#allocate.
+      # I fulfilled my mandate by maintaining API compatibility and performance,
+      # even though it may displease your Lordship.
+      #
+      # In all seriousness though, I can prove to a reasonable degree of certainty
+      # that setting options before calling initialize is required to maintain API compatibility.
+      # I tried doing it without it and not only did I break compatibility, it was much slower.
+      new_tag = self.allocate
+      new_tag.options = options
+      new_tag.send(:initialize, tag_name, markup, tokens)
+      new_tag
+    end
 
     def initialize(tag_name, markup, tokens)
       @tag_name   = tag_name
       @markup     = markup
+      @options    ||= {} # needs || because might be set before initialize
       parse(tokens)
     end
 
@@ -22,5 +38,20 @@ def render(context)
     def blank?
       @blank || true
     end
+
+    def switch_parse(markup)
+      case @options[:error_mode] || Template.error_mode
+      when :strict then strict_parse(markup)
+      when :lax    then lax_parse(markup)
+      when :warn
+        begin
+          return strict_parse(markup)
+        rescue SyntaxError => e
+          @warnings ||= []
+          @warnings << e
+          return lax_parse(markup)
+        end
+      end
+    end
   end # Tag
 end # Liquid
diff --git a/lib/liquid/tags/for.rb b/lib/liquid/tags/for.rb
@@ -47,19 +47,7 @@ class For < Block
     Syntax = /\A(#{VariableSegment}+)\s+in\s+(#{QuotedFragment}+)\s*(reversed)?/o
 
     def initialize(tag_name, markup, tokens)
-      if markup =~ Syntax
-        @variable_name = $1
-        @collection_name = $2
-        @name = "#{$1}-#{$2}"
-        @reversed = $3
-        @attributes = {}
-        markup.scan(TagAttributes) do |key, value|
-          @attributes[key] = value
-        end
-      else
-        raise SyntaxError.new("Syntax Error in 'for loop' - Valid syntax: for [item] in [collection]")
-      end
-
+      switch_parse(markup)
       @nodelist = @for_block = []
       super
     end
@@ -127,6 +115,43 @@ def render(context)
       result
     end
 
+    protected
+
+    def lax_parse(markup)
+      if markup =~ Syntax
+        @variable_name = $1
+        @collection_name = $2
+        @name = "#{$1}-#{$2}"
+        @reversed = $3
+        @attributes = {}
+        markup.scan(TagAttributes) do |key, value|
+          @attributes[key] = value
+        end
+      else
+        raise SyntaxError.new("Syntax Error in 'for loop' - Valid syntax: for [item] in [collection]")
+      end
+    end
+
+    def strict_parse(markup)
+      p = Parser.new(markup)
+      @variable_name = p.consume(:id)
+      raise SyntaxError, "For loops require an 'in' clause" unless p.id?('in')
+      @collection_name = p.expression
+      @name = "#{@variable_name}-#{@collection_name}"
+      @reversed = p.id?('reversed')
+
+      @attributes = {}
+      while p.look(:id) && p.look(:colon, 1)
+        unless attribute = p.id?('limit') || p.id?('offset')
+          raise SyntaxError, "Invalid attribute in for loop. Valid attributes are limit and offset"
+        end
+        p.consume
+        val = p.expression
+        @attributes[attribute] = val
+      end
+      p.consume(:end_of_string)
+    end
+
     private
 
       def render_else(context)