forked from ipython/ipython
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
Created tool for handling interactive input blocks.
Currently it has 100% test coverage, we should try to keep it that way.
- Loading branch information
Showing
1 changed file
with
385 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,385 @@ | ||
"""Analysis of text input into executable blocks. | ||
This is a simple example of how an interactive terminal-based client can use | ||
this tool:: | ||
bb = BlockBreaker() | ||
while not bb.interactive_block_ready(): | ||
bb.push(raw_input('>>> ')) | ||
print 'Input source was:\n', bb.source, | ||
""" | ||
#----------------------------------------------------------------------------- | ||
# Copyright (C) 2010 The IPython Development Team | ||
# | ||
# Distributed under the terms of the BSD License. The full license is in | ||
# the file COPYING, distributed as part of this software. | ||
#----------------------------------------------------------------------------- | ||
|
||
#----------------------------------------------------------------------------- | ||
# Imports | ||
#----------------------------------------------------------------------------- | ||
# stdlib | ||
import codeop | ||
import re | ||
import sys | ||
|
||
#----------------------------------------------------------------------------- | ||
# Utilities | ||
#----------------------------------------------------------------------------- | ||
|
||
# compiled regexps for autoindent management | ||
dedent_re = re.compile(r'^\s+raise|^\s+return|^\s+pass') | ||
ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)') | ||
|
||
|
||
def num_ini_spaces(s): | ||
"""Return the number of initial spaces in a string. | ||
Note that tabs are counted as a single space. For now, we do *not* support | ||
mixing of tabs and spaces in the user's input. | ||
Parameters | ||
---------- | ||
s : string | ||
""" | ||
|
||
ini_spaces = ini_spaces_re.match(s) | ||
if ini_spaces: | ||
return ini_spaces.end() | ||
else: | ||
return 0 | ||
|
||
|
||
def remove_comments(src): | ||
"""Remove all comments from input source. | ||
Note: comments are NOT recognized inside of strings! | ||
Parameters | ||
---------- | ||
src : string | ||
A single or multiline input string. | ||
Returns | ||
------- | ||
String with all Python comments removed. | ||
""" | ||
|
||
return re.sub('#.*', '', src) | ||
|
||
|
||
def get_input_encoding(): | ||
"""Return the default standard input encoding.""" | ||
return getattr(sys.stdin, 'encoding', 'ascii') | ||
|
||
#----------------------------------------------------------------------------- | ||
# Classes and functions | ||
#----------------------------------------------------------------------------- | ||
|
||
|
||
class BlockBreaker(object): | ||
# List | ||
buffer = None | ||
# Command compiler | ||
compile = None | ||
# Number of spaces of indentation | ||
indent_spaces = 0 | ||
# String, indicating the default input encoding | ||
encoding = '' | ||
# String where the current full source input is stored, properly encoded | ||
source = '' | ||
# Code object corresponding to the current source | ||
code = None | ||
# Boolean indicating whether the current block is complete | ||
is_complete = None | ||
|
||
def __init__(self): | ||
self.buffer = [] | ||
self.compile = codeop.CommandCompiler() | ||
self.encoding = get_input_encoding() | ||
|
||
def reset(self): | ||
"""Reset the input buffer and associated state.""" | ||
self.indent_spaces = 0 | ||
self.buffer[:] = [] | ||
self.source = '' | ||
|
||
def get_source(self, reset=False): | ||
"""Return the input source. | ||
Parameters | ||
---------- | ||
reset : boolean | ||
If true, all state is reset and prior input forgotten. | ||
""" | ||
out = self.source | ||
if reset: | ||
self.reset() | ||
return out | ||
|
||
def update_indent(self, lines): | ||
"""Keep track of the indent level.""" | ||
|
||
for line in remove_comments(lines).splitlines(): | ||
|
||
if line and not line.isspace(): | ||
if self.code is not None: | ||
inisp = num_ini_spaces(line) | ||
if inisp < self.indent_spaces: | ||
self.indent_spaces = inisp | ||
|
||
if line[-1] == ':': | ||
self.indent_spaces += 4 | ||
elif dedent_re.match(line): | ||
self.indent_spaces -= 4 | ||
|
||
def store(self, lines): | ||
"""Store one or more lines of input. | ||
If input lines are not newline-terminated, a newline is automatically | ||
appended.""" | ||
|
||
if lines.endswith('\n'): | ||
self.buffer.append(lines) | ||
else: | ||
self.buffer.append(lines+'\n') | ||
self.source = ''.join(self.buffer).encode(self.encoding) | ||
|
||
def push(self, lines): | ||
"""Push one ore more lines of input. | ||
This stores the given lines and returns a status code indicating | ||
whether the code forms a complete Python block or not. | ||
Any exceptions generated in compilation are allowed to propagate. | ||
Parameters | ||
---------- | ||
lines : string | ||
One or more lines of Python input. | ||
Returns | ||
------- | ||
is_complete : boolean | ||
True if the current input source (the result of the current input | ||
plus prior inputs) forms a complete Python execution block. Note that | ||
this value is also stored as an attribute so it can be queried at any | ||
time. | ||
""" | ||
# If the source code has leading blanks, add 'if 1:\n' to it | ||
# this allows execution of indented pasted code. It is tempting | ||
# to add '\n' at the end of source to run commands like ' a=1' | ||
# directly, but this fails for more complicated scenarios | ||
if not self.buffer and lines[:1] in [' ', '\t']: | ||
lines = 'if 1:\n%s' % lines | ||
|
||
self.store(lines) | ||
source = self.source | ||
|
||
# Before calling compile(), reset the code object to None so that if an | ||
# exception is raised in compilation, we don't mislead by having | ||
# inconsistent code/source attributes. | ||
self.code, self.is_complete = None, None | ||
self.code = self.compile(source) | ||
# Compilation didn't produce any exceptions (though it may not have | ||
# given a complete code object) | ||
if self.code is None: | ||
self.is_complete = False | ||
else: | ||
self.is_complete = True | ||
self.update_indent(lines) | ||
return self.is_complete | ||
|
||
def interactive_block_ready(self): | ||
"""Return whether a block of interactive input is ready for execution. | ||
This method is meant to be used by line-oriented frontends, who need to | ||
guess whether a block is complete or not based solely on prior and | ||
current input lines. The BlockBreaker considers it has a complete | ||
interactive block when *all* of the following are true: | ||
1. The input compiles to a complete statement. | ||
2. The indentation level is flush-left (because if we are indented, | ||
like inside a function definition or for loop, we need to keep | ||
reading new input). | ||
3. There is one extra line consisting only of whitespace. | ||
Because of condition #3, this method should be used only by | ||
*line-oriented* frontends, since it means that intermediate blank lines | ||
are not allowed in function definitions (or any other indented block). | ||
Block-oriented frontends that have a separate keyboard event to | ||
indicate execution should use the :meth:`split_blocks` method instead. | ||
""" | ||
if not self.is_complete: | ||
return False | ||
if self.indent_spaces==0: | ||
return True | ||
last_line = self.source.splitlines()[-1] | ||
if not last_line or last_line.isspace(): | ||
return True | ||
else: | ||
return False | ||
|
||
|
||
def split_blocks(self, lines): | ||
"""Split a multiline string into multiple input blocks""" | ||
|
||
#----------------------------------------------------------------------------- | ||
# Tests | ||
#----------------------------------------------------------------------------- | ||
|
||
import unittest | ||
|
||
import nose.tools as nt | ||
|
||
|
||
def test_spaces(): | ||
tests = [('', 0), | ||
(' ', 1), | ||
('\n', 0), | ||
(' \n', 1), | ||
('x', 0), | ||
(' x', 1), | ||
(' x',2), | ||
(' x',4), | ||
# Note: tabs are counted as a single whitespace! | ||
('\tx', 1), | ||
('\t x', 2), | ||
] | ||
|
||
for s, nsp in tests: | ||
nt.assert_equal(num_ini_spaces(s), nsp) | ||
|
||
|
||
def test_remove_comments(): | ||
tests = [('text', 'text'), | ||
('text # comment', 'text '), | ||
('text # comment\n', 'text \n'), | ||
('text # comment \n', 'text \n'), | ||
('line # c \nline\n','line \nline\n'), | ||
('line # c \nline#c2 \nline\nline #c\n\n', | ||
'line \nline\nline\nline \n\n'), | ||
] | ||
|
||
for inp, out in tests: | ||
nt.assert_equal(remove_comments(inp), out) | ||
|
||
|
||
def test_get_input_encoding(): | ||
encoding = get_input_encoding() | ||
nt.assert_true(isinstance(encoding, basestring)) | ||
# simple-minded check that at least encoding a simple string works with the | ||
# encoding we got. | ||
nt.assert_equal('test'.encode(encoding), 'test') | ||
|
||
|
||
class BlockBreakerTestCase(unittest.TestCase): | ||
def setUp(self): | ||
self.bb = BlockBreaker() | ||
|
||
def test_reset(self): | ||
self.bb.store('hello') | ||
self.bb.reset() | ||
self.assertEqual(self.bb.buffer, []) | ||
self.assertEqual(self.bb.indent_spaces, 0) | ||
self.assertEqual(self.bb.get_source(), '') | ||
|
||
def test_source(self): | ||
self.bb.store('1') | ||
self.bb.store('2') | ||
out = self.bb.get_source() | ||
self.assertEqual(out, '1\n2\n') | ||
out = self.bb.get_source(reset=True) | ||
self.assertEqual(out, '1\n2\n') | ||
self.assertEqual(self.bb.buffer, []) | ||
out = self.bb.get_source() | ||
self.assertEqual(out, '') | ||
|
||
def test_indent(self): | ||
bb = self.bb # shorthand | ||
bb.push('x=1') | ||
self.assertEqual(bb.indent_spaces, 0) | ||
bb.push('if 1:\n x=1') | ||
self.assertEqual(bb.indent_spaces, 4) | ||
bb.push('y=2\n') | ||
self.assertEqual(bb.indent_spaces, 0) | ||
bb.push('if 1:') | ||
self.assertEqual(bb.indent_spaces, 4) | ||
bb.push(' x=1') | ||
self.assertEqual(bb.indent_spaces, 4) | ||
# Blank lines shouldn't change the indent level | ||
bb.push(' '*2) | ||
self.assertEqual(bb.indent_spaces, 4) | ||
|
||
def test_indent2(self): | ||
bb = self.bb | ||
# When a multiline statement contains parens or multiline strings, we | ||
# shouldn't get confused. | ||
bb.push("if 1:") | ||
bb.push(" x = (1+\n 2)") | ||
self.assertEqual(bb.indent_spaces, 4) | ||
|
||
def test_dedent(self): | ||
bb = self.bb # shorthand | ||
bb.push('if 1:') | ||
self.assertEqual(bb.indent_spaces, 4) | ||
bb.push(' pass') | ||
self.assertEqual(bb.indent_spaces, 0) | ||
|
||
def test_push(self): | ||
bb = self.bb | ||
bb.push('x=1') | ||
self.assertTrue(bb.is_complete) | ||
|
||
def test_push2(self): | ||
bb = self.bb | ||
bb.push('if 1:') | ||
self.assertFalse(bb.is_complete) | ||
for line in [' x=1', '# a comment', ' y=2']: | ||
bb.push(line) | ||
self.assertTrue(bb.is_complete) | ||
|
||
def test_push3(self): | ||
"""Test input with leading whitespace""" | ||
bb = self.bb | ||
bb.push(' x=1') | ||
bb.push(' y=2') | ||
self.assertEqual(bb.source, 'if 1:\n x=1\n y=2\n') | ||
|
||
def test_interactive_block_ready(self): | ||
bb = self.bb | ||
bb.push('x=1') | ||
self.assertTrue(bb.interactive_block_ready()) | ||
|
||
def test_interactive_block_ready2(self): | ||
bb = self.bb | ||
bb.push('if 1:\n x=1') | ||
self.assertFalse(bb.interactive_block_ready()) | ||
bb.push('') | ||
self.assertTrue(bb.interactive_block_ready()) | ||
|
||
def test_interactive_block_ready3(self): | ||
bb = self.bb | ||
bb.push("x = (2+\n3)") | ||
self.assertTrue(bb.interactive_block_ready()) | ||
|
||
def test_interactive_block_ready4(self): | ||
bb = self.bb | ||
# When a multiline statement contains parens or multiline strings, we | ||
# shouldn't get confused. | ||
# FIXME: we should be able to better handle de-dents in statements like | ||
# multiline strings and multiline expressions (continued with \ or | ||
# parens). Right now we aren't handling the indentation tracking quite | ||
# correctly with this, though in practice it may not be too much of a | ||
# problem. We'll need to see. | ||
bb.push("if 1:") | ||
bb.push(" x = (2+") | ||
bb.push(" 3)") | ||
self.assertFalse(bb.interactive_block_ready()) | ||
bb.push(" y = 3") | ||
self.assertFalse(bb.interactive_block_ready()) | ||
bb.push('') | ||
self.assertTrue(bb.interactive_block_ready()) |