# 1.1 unit test 可以大大节约手动检查程序的时间

### unit test就是一种手动或自动检查函数或模块单元，能否在各种不用的输入后，能够输出正确的输出结果的程序测试方法。

# 1.2 unit test building process

### step1. create file （test_filename 包括函数、模块）
### step2. import pytest 和 test module filename.
### step3. unit test python function:    def test_XX():
### step4. assert
### step5. run unit test in command line : !pytest test_filename.py

# 例如测试模块以下输入是否得到设定输出
<img src='./chapter1.png' >

In [None]:
# Import the pytest package
import pytest

import row_to_list

def test_for_clean_row():
    assert row_to_list("2,081\t314,942\n") == \
         ["2,081", "314,942"]
    
def test_for_missing_area():
    assert row_to_list("\t293,410\n") is None
    
def test_for_missing_tab():
    assert row_to_list("1,463238,765\n") is None
 

# 1.3 查看test result

## F代表failed，有bug， .代表成功

### If you get an AssertionError, this means the function has a bug and you should fix it. If you get another exception, e.g. NameError, this means that something else is wrong with the unit test code and you should fix it so that the assert statement can actually run. 也会在TDD中出现，被测试code还没写完。

To find bugs in functions, you need to follow a four step procedure.

1.Write unit tests.

2. Run them.

3.Read the test result report and spot the bugs.

4.Fix the bugs.

# 2.1 test message, 将test报告修饰可读

### pytest 不可检测小数，float要用pytest.approx()方法

In [None]:
import pytest
from preprocessing_helpers import convert_to_int

def test_on_string_with_one_comma():
    test_argument = "2,081"
    expected = 2081
    actual = convert_to_int(test_argument)
    # Format the string with the actual return value
    message = "convert_to_int('2,081') should return the int 2081, but it actually returned {0}".format(actual)
    # Write the assert statement which prints message on failure
    assert actual is expected, message 

In [None]:
import numpy as np
import pytest
from as_numpy import get_data_as_numpy_array

def test_on_clean_file():
    expected = np.array([[2081.0, 314942.0],
                       [1059.0, 186606.0],
  					   [1148.0, 206186.0]
                       ]
                      )
    actual = get_data_as_numpy_array("example_clean_data.txt", num_columns=2)
    message = "Expected return value: {0}, Actual return value: {1}".format(expected, actual)
  # Complete the assert statement
    assert actual == pytest.approx(actual), message

In [None]:
def test_on_six_rows():
    example_argument = np.array([[2081.0, 314942.0], [1059.0, 186606.0],
                                 [1148.0, 206186.0], [1506.0, 248419.0],
                                 [1210.0, 214114.0], [1697.0, 277794.0]]
                                )
    # Fill in with training array's expected number of rows
    expected_training_array_num_rows = 4
    # Fill in with testing array's expected number of rows
    expected_testing_array_num_rows = 2
    actual = split_into_training_and_testing_sets(example_argument)
    # Write the assert statement checking training array's number of rows
    assert actual[0].shape[0] == expected_training_array_num_rows, "The actual number of rows in the training array is not {}".format(expected_training_array_num_rows)
    # Write the assert statement checking testing array's number of rows
    assert actual[1].shape[0] == 2, "The actual number of rows in the testing array is not {}".format(expected_testing_array_num_rows)

# 2.2 pytest.raises() 

### pytest.raises() 一种context manager 测试model是否能检测出exception 的bug。
### 如果能检测出exception 的bug，则测试通过。如果不能，则测试返回failed，测试失败，需要修复bug。

In [None]:
 def test_valueerror_on_one_dimensional_argument():
    example_argument = np.array([2081, 314942, 1059, 186606, 1148, 206186])
    #用pytest.raises(ValueError) 上下文管理器
    with pytest.raises(ValueError) as exception_info:    # store the exception
        split_into_training_and_testing_sets(example_argument)
    # Check if ValueError contains correct message，能报ValueError的错并与错误信息匹配则通过，不能则不通过测试
    assert exception_info.match("Argument data array must be two dimensional. "
                                "Got 1 dimensional array instead!"
)

# 2.3 test arguments

## 多少argument需要被test？
### 1.bad arguments 会返回exception的那种
### 2. special arguments (boundary values, 控制函数行为的错误值，例如training set which is not 0.75 of total dataset as we set. Normal 值紧挨着的错误值边界值。)
### 3. normal arguments.

例在case row_to_list 的检测中： The boundary values of row_to_list() are now marked in orange. The normal argument is marked in green and the values triggering special behavior are marked in blue.
<img src='./2.3test_arguments.png'  >

In [None]:
'''测试special arguments. (boundary values )'''

import pytest
from preprocessing_helpers import row_to_list

def test_on_no_tab_no_missing_value():    # (0, 0) boundary value， （1，0）是正常值
    # Assign actual to the return value for the argument "123\n"
    actual = row_to_list("123\n")
    assert actual is None, "Expected: None, Actual: {0}".format(actual)
    
def test_on_two_tabs_no_missing_value():    # (2, 0) boundary value
    actual = row_to_list("123\t4,567\t89\n")
    # Complete the assert statement
    assert actual is None, "Expected: None, Actual: {0}".format(actual)
    
def test_on_one_tab_with_missing_value():    # (1, 1) boundary value
    actual = row_to_list("\t4,567\n")
    # Format the failure message
    assert actual is None, "Expected: None, Actual: {0}".format(actual)

In [None]:
'''测试 bad arguments '''

import pytest
from preprocessing_helpers import row_to_list

def test_on_no_tab_with_missing_value():    # (0, 1) case
    # Assign to the actual return value for the argument "\n"
    actual = row_to_list('\n')
    # Write the assert statement with a failure message
    assert actual is None , "Expected: None, Actual: {0}".format(actual)
    
def test_on_two_tabs_with_missing_value():    # (2, 1) case
    # Assign to the actual return value for the argument "123\t\t89\n"
    actual = row_to_list('123\t\t89\n')
    # Write the assert statement with a failure message
    assert actual is None, "Expected: None, Actual: {0}".format(actual)

In [None]:
'''测试normal arguments'''

import pytest
from preprocessing_helpers import row_to_list

def test_on_normal_argument_1():
    actual = row_to_list("123\t4,567\n")
    # Fill in with the expected return value for the argument "123\t4,567\n"
    expected = ["123", "4,567"]
    assert actual == expected, "Expected: {0}, Actual: {1}".format(expected, actual)
    
def test_on_normal_argument_2():
    actual = row_to_list("1,059\t186,606\n")
    expected = ["1,059", "186,606"]
    # Write the assert statement along with a failure message
    assert expected == actual, "Expected: {0}, Actual: {1}".format(expected, actual)

# 2.4 TDD (test driven development)

## 在软件开发的时候，先编写test 代码，再写软件模块程序（根据bug写程序），再对模块程序进行测试

In TDD, the first run of the tests always fails with a NameError or ImportError because the function does not exist yet. 

# 3.1 test directory, test class

## test class 就是一个test function的container

In [None]:
import pytest
import numpy as np

from models.train import split_into_training_and_testing_sets

# Declare the test class
class TestSplitIntoTrainingAndTestingSets(object ):
    # Fill in with the correct mandatory argument
    def test_on_one_row(self):
        test_argument = np.array([[1382.0, 390167.0]])
        with pytest.raises(ValueError) as exc_info:
            split_into_training_and_testing_sets(test_argument)
        expected_error_msg = "Argument data_array must have at least 2 rows, it actually has just 1"
        assert exc_info.match(expected_error_msg)

# 3.2 shell command for all test file

#### 1. 测试当前文件夹下所有的test文件，!pytest; or !pytest -x 整个test包(碰到报错代码就停止测试）。 In real life, the !pytest or !pytest -x command is often used in CI servers. It can also be useful if there is a major update to the code base, which changes many application modules at the same time. Running all tests is the only way to check if anything was broken due to the update.
#### 2. 测试class， path+" :: " + classname。 !pytest models/test_train.py::TestSplitIntoTrainingAndTestingSets
#### 3.测试method，path+" :: " + classname+" :: " + methodname  。 !pytest models/test_train.py::TestSplitIntoTrainingAndTestingSets::test_on_six_rows
#### 4.关键字查找!pytest -k。!pytest -k "SplitInto"。 The -k flag is really useful, because it helps you select tests and test classes by typing only a unique part of its name. This saves a lot of typing, and you must admit that
#### 5.

# 3.3 Mark a test class as expected to fail

#### 使用@pytest.mark.xfail(reason="Using TDD, model_test() has not yet been implemented")，隐藏module还没建好报NameError的错误，使其余部分通过测试。==> 查看报告原因，shell command: !pytest -rx
#### 使用@pytest.mark.skipif(sys.version_info > (2, 7), reason="Works only on Python 2.7 or lower")，跳过版本问题的错误.==> shell command: !pytest -rs

In [None]:
# Add a reason for the expected failure
@pytest.mark.xfail(reason="Using TDD, model_test() has not yet been implemented")
class TestModelTest(object):
    def test_on_linear_data(self):

In [None]:
# Import the sys module
import sys

class TestGetDataAsNumpyArray(object):
    # Add a reason for skipping the test
    @pytest.mark.skipif(sys.version_info > (2, 7), reason="Works only on Python 2.7 or lower")
    def test_on_clean_file(self):
        expected = np.array([[2081.0, 314942.0],
                             [1059.0, 186606.0],
                             [1148.0, 206186.0]
                             ]
                            )
        actual = get_data_as_numpy_array("example_clean_data.txt", num_columns=2)
        message = "Expected return value: {0}, Actual return value: {1}".format(expected, actual)
        assert actual == pytest.approx(expected), message

# 3.4 travel CI 的安装与使用，上传GitHub检测代码

# 4.1 test preprocessing functions --fixture
### fixture. pytest keeps the fixtures separate from the tests as this encourages reusing fixtures for tests that need the same/similar setup and teardown code.

#### Write a fixture for an empty data file
When a function takes a data file as an argument, you need to write a fixture that takes care of creating and deleting that data file. 

1. Creates an empty data file empty.txt relative to the current working directory in setup.

2. Yields the path to the empty data file.

3. Deletes the empty data file in teardown.

In [None]:
@pytest.fixture
def empty_file():
    # Assign the file path "empty.txt" to the variable
    file_path = 'empty.txt'
    open(file_path, "w").close()
    # Yield the variable file_path
    yield file_path
    # Remove the file in the teardown
    os.remove(file_path)
    
def test_on_empty_file(self, empty_file):
    expected = np.empty((0, 2))
    actual = get_data_as_numpy_array(empty_file, 2)
    assert actual == pytest.approx(expected), "Expected: {0}, Actual: {1}".format(expected, actual)

### Fixture chaining using tmpdir
The built-in tmpdir fixture is very useful when dealing with files in setup and teardown. tmpdir combines seamlessly with user defined fixture via fixture chaining.

In [None]:
import pytest

@pytest.fixture
# Add the correct argument so that this fixture can chain with the tmpdir fixture
def empty_file(tmpdir):
    # Use the appropriate method to create an empty file in the temporary directory
    file_path = tmpdir.join("empty.txt")
    open(file_path, "w").close()
    yield file_path

# 4.2 mocking 
### 屏蔽test通过不了的部分（函数或模块），使其成为bug-free。暂时通过test

In [None]:
'''Program a bug-free dependency'''

# Define a function convert_to_int_bug_free
def convert_to_int_bug_free(comma_separated_integer_string):
    # Assign to the dictionary holding the correct return values 
    return_values = {"1,801": 1801, "201,411": 201411, "2,002": 2002, "333,209": 333209, "1990": None, "782,911": 782911, "1,285": 1285, "389129": None}
    # Return the correct result using the dictionary return_values
    return return_values[comma_separated_integer_string]

In [None]:
'''Mock a dependency'''

# Add the correct argument to use the mocking fixture in this test
def test_on_raw_data(self, raw_and_clean_data_file, mocker):
    raw_path, clean_path = raw_and_clean_data_file
    # Replace the dependency with the bug-free mock
    convert_to_int_mock = mocker.patch("data.preprocessing_helpers.convert_to_int",
                                       side_effect=convert_to_int_bug_free)
    preprocess(raw_path, clean_path)
    # Check if preprocess() called the dependency correctly
    assert convert_to_int_mock.call_args_list == [call("1,801"), call("201,411"), call("2,002"), call("333,209"), call("1990"), call("782,911"), call("1,285"), call("389129")]
    with open(clean_path, "r") as f:
        lines = f.readlines()
    first_line = lines[0]
    assert first_line == "1801\\t201411\\n"
    second_line = lines[1]
    assert second_line == "2002\\t333209\\n" 

# 4.3 testing models

### 检测模型是否能运行
#### The model_test() function, which measures how well the model fits unseen data, returns a quantity called  which is very difficult to compute in the general case. Therefore, you need to find special testing sets where computing  is easy.

In [None]:
'''case. 1. leaner model'''

import numpy as np
import pytest
from models.train import model_test

def test_on_perfect_fit():
    # Assign to a NumPy array containing a linear testing set
    test_argument = np.array([[1.0, 3.0], [2.0, 5.0], [3.0, 7.0]])
    # Fill in with the expected value of r^2 in the case of perfect fit
    expected = 1.0
    # Fill in with the slope and intercept of the model
    actual = model_test(test_argument, slope=2.0, intercept=1.0)
    # Complete the assert statement
    assert actual == pytest.approx(expected), "Expected: {0}, Actual: {1}".format(expected, actual)

In [None]:
'''case. 2 '''

def test_on_circular_data(self):
    theta = pi/4.0
    # Assign to a NumPy array holding the circular testing data
    test_argument = np.array([[1.0, 0.0], [cos(theta), sin(theta)],
                              [0.0, 1.0],
                              [cos(3 * theta), sin(3 * theta)],
                              [-1.0, 0.0],
                              [cos(5 * theta), sin(5 * theta)],
                              [0.0, -1.0],
                              [cos(7 * theta), sin(7 * theta)]]
                             )
    # Fill in with the slope and intercept of the straight line
    actual = model_test(test_argument, slope=0.0, intercept=0.0)
    # Complete the assert statement
    assert actual == pytest.approx(0.0)

# 4.4 plot test image