# Code Version 1: A Single Independent Function for Each Field

In [9]:
## Code Version 1: A single independent function for each field data type:
## SKU, description, currency, integer, percent

## Independent Functions

def get_sku(field):
    """
    field is a string that is the first column
    get_sku strips leading/lagging whitespace and returns a string with no whitespace
    """
    return field.strip()

def get_description(field):
    """
    field is a string which contains either tabs or newlines
    get_description substitutes tabs and newlines with a single space
    """
    field = field.replace('\t', ' ')
    return field.replace('\n', ' ')

def get_currency(field):
    """
    field is a string
    get_currency strips the $ and ,; and returns a float rounded to 2 decimals
    """
    special_chars = "$,"
    for i in special_chars:
        field = field.replace(i, '')

    return round(float(field), 2)


def get_integer(field):
    """
    field is a string
    get_integer strips the comma and converts to an int
    """
    special_chars = ","

    for i in special_chars:
        field = field.replace(i, '')

    return int(field)


def get_percent(field):
    """
    field is a string
    get_percent strips the percent sign, converts to a float, and returns a decimal
    representing the percentage
    """
    special_chars = "%"
    for i in special_chars:
        field = field.replace(i, '')
    return float(field)/100

## Source data
products = [["SKU", "description", "cost", "list_price", "units_in_stock", "target_margin"],
  ['  20776','Budget Device\tMini\n1000-pack','$284.43','$406.563',
  '673','42.9%'],
 ['  13508','Budget Widget\tExtra Large\n1000-pack','$207.49','$270.203',
  '461','30.2%'],
 ['   1261','Super Whatsit\tMini\nDozen','$1,091.33','$1,485.85','368',
  '36.1%'],
 ['  49367','Super Gizmo\tExtra Large\nGross','$1,977.36','$2,589.87',
  '2,585','31.0%'],
 ['  39655','Student Gadget\tMini\nGross','$437.97','$656.749','4,452',
  '50.0%']]


## Clean the products data and return cleaned data
## Skip the first row (title of columns) and clean data starting at row 1

for i in products[1:]:
    i[0] = get_sku(i[0])
    i[1] = get_description(i[1])
    i[2] = get_currency(i[2])
    i[3] = get_currency(i[3])
    i[4] = get_integer(i[4])
    i[5] = get_percent(i[5])
    
products

[['SKU',
  'description',
  'cost',
  'list_price',
  'units_in_stock',
  'target_margin'],
 ['20776', 'Budget Device Mini 1000-pack', 284.43, 406.56, 673, 0.429],
 ['13508', 'Budget Widget Extra Large 1000-pack', 207.49, 270.2, 461, 0.302],
 ['1261', 'Super Whatsit Mini Dozen', 1091.33, 1485.85, 368, 0.361],
 ['49367', 'Super Gizmo Extra Large Gross', 1977.36, 2589.87, 2585, 0.31],
 ['39655', 'Student Gadget Mini Gross', 437.97, 656.75, 4452, 0.5]]

# Code Version 2: Cleaning Operations Moved to Their Own Functions

In [11]:
## Code Version 2: Break out the specific cleaning actions into separate functions. 

## Cleaning functions below

def clean_spaces(field):
    return field.strip()

def clean_whitespace(field):
    field = field.replace('\t', ' ')
    return field.replace('\n', ' ')
    
def clean_commas(field):
    return field.replace(',','')

def clean_currency(field):
    return field.replace('$','')

def clean_percent(field):
    return field.replace('%','')   

## Field functions

def get_sku(sku_field):
    return clean_spaces(sku_field)

def get_description(desc_field):
    return clean_whitespace(desc_field)

def get_currency(curr_field):
    curr_field = clean_commas(curr_field)
    return round(float(clean_currency(curr_field)), 2)

def get_integer(field):
    return int(clean_commas(field))

def get_percent(field):
    return float(clean_percent(field))/100

## Source data
products = [["SKU", "description", "cost", "list_price", "units_in_stock", "target_margin"],
  ['  20776','Budget Device\tMini\n1000-pack','$284.43','$406.563',
  '673','42.9%'],
 ['  13508','Budget Widget\tExtra Large\n1000-pack','$207.49','$270.203',
  '461','30.2%'],
 ['   1261','Super Whatsit\tMini\nDozen','$1,091.33','$1,485.85','368',
  '36.1%'],
 ['  49367','Super Gizmo\tExtra Large\nGross','$1,977.36','$2,589.87',
  '2,585','31.0%'],
 ['  39655','Student Gadget\tMini\nGross','$437.97','$656.749','4,452',
  '50.0%']]


## Clean the products data and return cleaned data
## Skip the first row (title of columns) and clean data starting at row 1

for i in products[1:]:
    i[0] = get_sku(i[0])
    i[1] = get_description(i[1])
    i[2] = get_currency(i[2])
    i[3] = get_currency(i[3])
    i[4] = get_integer(i[4])
    i[5] = get_percent(i[5])
    
products

[['SKU',
  'description',
  'cost',
  'list_price',
  'units_in_stock',
  'target_margin'],
 ['20776', 'Budget Device Mini 1000-pack', 284.43, 406.56, 673, 0.429],
 ['13508', 'Budget Widget Extra Large 1000-pack', 207.49, 270.2, 461, 0.302],
 ['1261', 'Super Whatsit Mini Dozen', 1091.33, 1485.85, 368, 0.361],
 ['49367', 'Super Gizmo Extra Large Gross', 1977.36, 2589.87, 2585, 0.31],
 ['39655', 'Student Gadget Mini Gross', 437.97, 656.75, 4452, 0.5]]