# Test get_frequently_mutated

NOTE: test for wrong input
What about importing cptac, cancer object, and algorithms? 

In [1]:
import pandas as pd
import cptac
import cptac.algorithms as al

In [12]:
def print_test_result(PASS):
    """Prints the result of a test, based on a bool.
    Parameters:
    PASS (bool): Whether or not the test passed.
    """
    if PASS:
        print('\tPASS')
    else:
        print('\tFAIL\n')

def check_returned_is_df(returned):
    """Checks that an object is a dataframe. Prints a specific message if it's actually None, or a general message if it's something else.
    Parameters:
    returned: The object to test
    Returns:
    bool: Indicates whether the object was a dataframe.
    """
    if returned is None:
        print("Function under test returned None.")
        return False
    
    if not isinstance(returned, pd.core.frame.DataFrame):
        print("Returned object was not a dataframe. Type of object: {}".format(type(returned)))
        return False
    return True

def check_df_shape(df, exp_shape):
    """Checks that a dataframe has the proper shape.
    Parameters:
    df (pandas.core.frame.DataFrame): The dataframe to test.
    exp_shape (tuple): A tuple with two elements. First element is expected number of rows, second is expected number of columns.
    Returns:
    bool: Indicates whether the dataframe had the proper shape.
    """
    act_shape = df.shape
    if exp_shape != act_shape:
        print("Dataframe dimensions did not match expected values.\n\tExpected: {}\n\tActual: {}\n".format(exp_shape, act_shape))
        return False
    return True

In [13]:
def check_getter(df, exp_dim, exp_headers, coordinates, values): 
    """Test a dataframe's dimensions and headers, and three test values, then print whether it passed the test.
    Parameters
    df: the dataframe gotten by the getter we are testing
    exp_dim: a tuple containing the expected dimensions of the dataframe, in the format (rows, columns)
    exp_headers: if the dataframe has up to 20 columns, all of the headers for the dataframe, in order. If it has more than 20 columns, then a list containing the first ten and last ten headers, in order.
    coordinates: a tuple with three elements, each element being a tuple with two elements, the first element being the int index of the row of a test value, and the second element being the int index of the column of a test value
    values: a tuple with three elements, each element being the expected value of the test value corresponding to the coordinates at the same index in the coordinates parameter 
    Returns
    bool indicating if the dataframe had the correct data.
    """
    PASS = True

    # Check that df is a dataframe, not None or something else.
    if not check_returned_is_df(df):
        return False # End test, because other tests will be useless.

    # Check dimensions
    if not check_df_shape(df, exp_dim):
        PASS = False

    # Check headers
    act_headers_all = list(df.columns.values)
    if len(df.columns.values) <= 20:
        act_headers = act_headers_all
    else:
        act_headers = act_headers_all[:10] + act_headers_all[-10:]

    if len(exp_headers) != len(act_headers):
        print("Unexpected number of test headers in dataframe. Expected number of headers: {}. You passed {} headers.\n".format(len(act_headers), len(exp_headers)))
        PASS = False
    else:
        for i, header in enumerate(exp_headers):
            if header != act_headers[i]:
                print("Dataframe header did not match expected value.\n\tExpected: {}\n\tActual: {}\n".format(header, act_headers[i]))
                PASS = False

    # Check test values
    act_values = [
        df.iloc[coordinates[0][0], coordinates[0][1]],
        df.iloc[coordinates[1][0], coordinates[1][1]],
        df.iloc[coordinates[2][0], coordinates[2][1]]]

    for i, value in enumerate(values):
        if act_values[i] != value:
            print("Dataframe value did not match expected value.\n\tColumn: {}\n\tIndex: {}\n\tExpected: {}\n\tActual: {}\n".format(df.columns.values[coordinates[i][1]], df.index.values[coordinates[i][0]], value, act_values[i]))
            PASS = False

    # Return whether the dataframe passed the test
    return PASS

In [18]:
import cptac.algorithms as al # should I put this in the test? 
def test_get_frequently_mutated_en():
    """Test get_frequently_mutated."""
    en = cptac.Endometrial()
    #import cptac.algorithms as al
    
    print('Running get_frequently_mutated...')

    df = al.get_frequently_mutated(en)
    name = "frequently_mutated"
    dimensions = (232, 4)
    headers = ['Gene', 'Unique_Samples_Mut', 'Missence_Mut', 'Truncation_Mut']
    
    # test gene names
    test_coord_names = ((53, 0), (32, 0), (227, 0))
    test_vals_names = ('CTCF', 'CCDC168', 'ZNF536')
    
    # test when when missence and trucation don't add up to equal the fraction mutated 
    #(miss and trunc in same sample)
    test_coord_CTCF = ((53, 1), (53, 2), (53, 3)) 
    test_vals_CTCF = (27/95, 9/95, 23/95) 
    
    # test when missence and trucation values are the same
    test_coord_CCDC168 = ((32, 1),(32, 2),(32, 3))
    test_vals_CCDC168 = (16/95, 11/95, 11/95)
    
    # test when there are no truncation type mutatations
    test_coord_ZNF536 = ((227, 1),(227, 2),(227, 3))
    test_vals_ZNF536 = (12/95, 12/95, 0/95)
    
    test_coord_TP53 = ((205, 1),(205, 2),(205, 3))
    test_vals_TP53 = (21/95, 15/95, 7/95)

    test_coord_vals = [(test_coord_CTCF, test_vals_CTCF), (test_coord_CCDC168, test_vals_CCDC168),
                      (test_coord_ZNF536, test_vals_ZNF536), (test_coord_TP53, test_vals_TP53)]

    for coord, val in test_coord_vals:
        #print('coord', coord, 'val', val)
        PASS = check_getter(df, dimensions, headers, coord, val)
    
    print_test_result(PASS)

In [19]:
test_get_frequently_mutated_en()

Running get_frequently_mutated...   
	PASS


In [20]:
def test_get_frequently_mutated_co_cutoff_15():
    """Test get_frequently_mutated."""
    co = cptac.Colon()
    print('Running get_frequently_mutated...')

    df = al.get_frequently_mutated(co, 0.15)
    name = "frequently_mutated"
    dimensions = (138, 4)
    headers = ['Gene', 'Unique_Samples_Mut', 'Missence_Mut', 'Truncation_Mut']
    
    # test when there are no missence type mutatations
    test_coord_CASP5 = ((15, 1), (15, 2), (15, 3))
    test_vals_CASP5 = (19/97, 0/97, 19/97) 
    
    # test when there are no truncation type mutatations
    test_coord_KRAS = ((66, 1),(66, 2),(66, 3))
    test_vals_KRAS = (35/97, 35/97, 0/97)

    test_coord_PIK3CA = ((92, 1),(92, 2),(92, 3))
    test_vals_PIK3CA = (24/97, 23/97, 1/97)
    
    # test when missence and trucation don't add up to equal the fraction mutated (miss and trunc in same sample)
    test_coord_RYR2 = ((102, 1),(102, 2),(102, 3))
    test_vals_RYR2 = (21/97, 19/97, 7/97)

    test_coord_vals = [(test_coord_CASP5, test_vals_CASP5), (test_coord_KRAS, test_vals_KRAS),
                      (test_coord_PIK3CA, test_vals_PIK3CA), (test_coord_RYR2, test_vals_RYR2)]

    for coord, val in test_coord_vals:
        #print('coord', coord, 'val', val)
        PASS = check_getter(df, dimensions, headers, coord, val)
    
    print_test_result(PASS)

In [21]:
test_get_frequently_mutated_co_cutoff_15()

Running get_frequently_mutated...   
	PASS


In [24]:
def test_get_frequently_mutated_ov_default_cutoff():
    """Test get_frequently_mutated."""
    ov = cptac.Ovarian()
    print('Running get_frequently_mutated...')

    df = al.get_frequently_mutated(ov)
    name = "frequently_mutated"
    dimensions = (16, 4)
    headers = ['Gene', 'Unique_Samples_Mut', 'Missence_Mut', 'Truncation_Mut']
    
    # test genes names
    test_coord_names = ((15, 0), (13, 0), (2, 0))
    test_vals_names = ('WDFY4', 'TP53', 'MT-CO1')
    
    #test when missence and trucation don't add up to equal the fraction mutated 
    #(miss and trunc in same sample)
    test_coord_WDFY4 = ((15, 1), (15, 2), (15, 3)) 
    test_vals_WDFY4 = (10/83, 8/83, 3/83) 
    
    # test highest count
    test_coord_TP53 = ((13, 1),(13, 2),(13, 3))
    test_vals_TP53 = (77/83, 50/83, 27/83)
    
    # test when there are no truncation mutations
    test_coord_MTCO1 = ((2, 1),(2, 2),(2, 3))
    test_vals_MTCO1 = (10/83, 10/83, 0/83)

    test_coord_vals = [(test_coord_names, test_vals_names), (test_coord_WDFY4, test_vals_WDFY4),
                      (test_coord_TP53, test_vals_TP53), (test_coord_MTCO1, test_vals_MTCO1)]

    for coord, val in test_coord_vals:
        #print('coord', coord, 'val', val)
        PASS = check_getter(df, dimensions, headers, coord, val)
    
    print_test_result(PASS)

In [25]:
test_get_frequently_mutated_ov_default_cutoff()

Running get_frequently_mutated...   
	PASS


In [26]:
import pandas as pd
def test_get_frequently_mutated_renal():
    """Test get_frequently_mutated."""
    rc = cptac.RenalCcrcc()
    print('Running get_frequently_mutated...')

    df = al.get_frequently_mutated(rc)
    name = "frequently_mutated"
    dimensions = (6, 4)
    headers = ['Gene', 'Unique_Samples_Mut', 'Missence_Mut', 'Truncation_Mut']
    
    # test genes names
    test_coord_names = ((0, 0), (2, 0), (4, 0))
    test_vals_names = ('BAP1', 'PBRM1', 'TTN')
    
    total_tumors = 110
    
    test_coord_BAP1 = ((0, 1), (0, 2), (0, 3)) 
    test_vals_BAP1 = (17/total_tumors, 7/total_tumors, 10/total_tumors) 
    
    # test high count
    test_coord_PBRM1 = ((2, 1),(2, 2),(2, 3))
    test_vals_PBRM1 = (44/total_tumors, 8/total_tumors, 37/total_tumors)
    
    # check that silent mutations are not counted (TTN has many silent mutations)
    test_coord_TTN = ((4, 1),(4, 2),(4, 3))
    test_vals_TTN = (13/total_tumors, 10/total_tumors, 4/total_tumors)

    test_coord_vals = [(test_coord_names, test_vals_names), (test_coord_BAP1, test_vals_BAP1),
                      (test_coord_PBRM1, test_vals_PBRM1), (test_coord_TTN, test_vals_TTN)]

    for coord, val in test_coord_vals:
        #print('coord', coord, 'val', val)
        PASS = check_getter(df, dimensions, headers, coord, val)
    
    print_test_result(PASS)

In [27]:
test_get_frequently_mutated_renal()

Running get_frequently_mutated...   
	PASS


In [28]:
print("\nTesting get_frequently_mutated from the algorithms...")
import cptac.algorithms as al #import algorithms here? 
test_get_frequently_mutated_en()
test_get_frequently_mutated_co_cutoff_15()
test_get_frequently_mutated_ov_default_cutoff()
test_get_frequently_mutated_renal()


Testing get_frequently_mutated from the algorithms...
Running get_frequently_mutated...   
	PASS
Running get_frequently_mutated...   
	PASS
Running get_frequently_mutated...   
	PASS
Running get_frequently_mutated...   
	PASS
