# Test Util Functions

In [207]:
import datetime
import numpy as np
import pandas as pd
import pytest
from mock import Mock

# UTILS

In [4]:
def counter(f):

    def wrapped(*args, **kwargs):
        wrapped.count+=1
        result = f(*args, **kwargs)
        return result
    wrapped.count = 0
    return wrapped

In [87]:
def fill_na(column)

    if column.dtypes == np.float64:
        column = column.fillna(0.0)
    if column.dtypes == "O":
        column = column.fillna("N/A")
    else:
        column = column
    return column

In [193]:
def parse_ride_data(data):

    merchant, ride_data = data['merchant_name'], data['product_description']
    try:
        if merchant == "Uber":
            if "|" in ride_data:
                ride_info, duration_data = ride_data.split("|")
                ride_type, distance_data = ride_info.split(" ", 1)
            else:
                ride_type, distance_data, duration_data = ride_data, "N/A", "N/A"
        elif merchant == "Lyft":
            if "(" in ride_data:
                ride_type, travel_data = ride_data.split("(")
                travel_data = travel_data[:-1]
                distance_data, duration_data = travel_data.split(",")
            else:
                ride_type, distance_data, duration_data = ride_data, "N/A", "N/A"
        else:
            ride_type, distance_data, duration_data = ride_data, "N/A", "N/A"
    except: 
        # this should trigger in the case where data meets the intiatal condition of either containing "|" 
        # or "(" but does not follow the remaining tradition conventions ie. abonormal record (examples: additional delimiters,
        # spacing issues, too many to unpack, etc)
        
        ride_type, distance_data, duration_data = ride_data, "N/A", "N/A"
    data['ride_type'] = ride_type.strip(),
    data['distance_data'] =  distance_data.strip()
    data['duration_data'] =  duration_data.strip()
    return data

In [153]:
def remap_dict(dictionary):

    res = []
    for key, value in dictionary.items():
        if isinstance(value, dict):
            new_dictionary = {'name': key, **value}
            res.append(new_dictionary)
        else:
            res.append({key: value})
    return res

# Test Data

In [154]:
test_num = pd.DataFrame(data={'number': np.nan}, index=['number'])
expected_num = pd.DataFrame(data={'number': 0.0}, index=['number'])
test_obj = pd.DataFrame(data={'object': None}, index=['object'])
expected_obj = pd.DataFrame(data={'object': 'N/A'}, index=['object'])
test_not_num_obj = pd.DataFrame(data={'date': datetime.datetime(2020,1,1)}, index=['date'])
test_parse_df = pd.DataFrame(data = {
    'merchant_name': ['Uber', 'Lyft', 'Uber'],
    'product_description': ["UberX 3.16 miles | 22 min", "Lyft fare (1.18mi, 8m 57s)", "UberX SCL Ride cancelled"],
    'nothing':[1,1,1,], 
})
expected_parse_df = pd.DataFrame(data={
    'merchant_name': ['Uber', 'Lyft', 'Uber'],
    'product_description': ["UberX 3.16 miles | 22 min", "Lyft fare (1.18mi, 8m 57s)", "UberX SCL Ride cancelled"],
    'nothing':[1,1,1,],
    'ride_type': [('UberX',), ('Lyft fare',), ('UberX SCL Ride cancelled',)],
    'distance_data': ['3.16 miles', '1.18mi', 'N/A'],
    'duration_data': ['22 min', '8m 57s', 'N/A']})
test_stats_dict = {'count': 267045.0,
 'mean': 16.56587470276928,
 'std': 65.22228905493417,
 'min': 0.0,
 '25%': 7.99,
 '50%': 12.06,
 '75%': 19.53,
 'max': 21700.0
}
expected_stats_dict = [{'count': 267045.0},
 {'mean': 16.56587470276928},
 {'std': 65.22228905493417},
 {'min': 0.0},
 {'25%': 7.99},
 {'50%': 12.06},
 {'75%': 19.53},
 {'max': 21700.0}]


In [157]:
def test_counter():
    @counter
    def mock_func():
        pass
    mock_func()
    assert mock_func.count == 1

In [158]:
@pytest.mark.parameterize("test_type", "expected_fill", [(test_num, expected_num),(test_obj, expected_obj),
                                                         (test_not_num_obj,test_not_num_obj)])
def test_fill_na(test_type):
    assert expected_fill.to_dict() == test_type.apply(fill_na, axis=0).to_dict()
    

In [201]:
def test_parse_ride_data(test_df, expected_df):
    assert expected_df.to_dict() == test_df.apply(parse_ride_data, axis=1).to_dict()
    

In [202]:
def test_remap_dict(test_dict, expected_dict):
    assert expected_dict == test_dict.apply(remap_dict, axis=1)