In [154]:
from collections import defaultdict, namedtuple, Counter, deque
import csv

#### Select the file to process. Create the namedtuple that will hold each line item. Set the names of the fields in the text file

In [155]:
invoice_data = "276927.txt"

line_item = namedtuple('line_item', 'invoice testtype cost desc rco adient')

fields = ('invoice', 'testtype', 'cost', 'desc')

#### Function to read in the text file, sort through line by line, extract each line into a lineitem object, then create the dict that holds the items by testtype

In [156]:
def get_line_by_job(data = invoice_data):
    items = defaultdict(list)
    with open(data, encoding='utf-8') as f:
        reader = csv.DictReader(f, fieldnames=fields, delimiter='\t')
        for line in reader:
            try:
                invoice = line['invoice']
                testtype = line['testtype']
                cost = float(line['cost'])
                desc = line['desc'].split(',')[1]
                job = line['desc'].split(',')[2]
                adient = line['desc'].split(',')[0]
            except ValueError:
                continue
            l = line_item(invoice= invoice, testtype= testtype, cost= cost, desc= desc, rco= job, adient= adient)
            items[testtype].append(l)
            
    return items

#### Run the function

In [157]:
all_items = get_line_by_job()

#### Display the list of items associated with a testtype

In [158]:
all_items['PV Testing']

[line_item(invoice='32.12522-TE.002.03.02', testtype='PV Testing', cost=716.17, desc=' S/B Abusive Load', rco=' 929780', adient='10226096'),
 line_item(invoice='32.12522-TE.003.08.02', testtype='PV Testing', cost=1354.78, desc=' General Static Load', rco=' 929838', adient='10233126-1'),
 line_item(invoice='32.12522-TE.003.08.02', testtype='PV Testing', cost=4718.56, desc=' Child Restraint Anchorages', rco=' 929846', adient='10225665'),
 line_item(invoice='32.12522-TE.003.07.02', testtype='PV Testing', cost=638.02, desc=' Seat Anchorage Strength', rco=' 929877', adient='10234831'),
 line_item(invoice='32.12522-TE.003.08.02', testtype='PV Testing', cost=5294.8, desc=' Adjuster Durability', rco=' 929879', adient='10228261'),
 line_item(invoice='32.12522-TE.003.07.02', testtype='PV Testing', cost=2994.6, desc=' Child Restraint Anchorages', rco=' 929880', adient='10226107'),
 line_item(invoice='32.12522-TE.003.07.02', testtype='PV Testing', cost=700.54, desc=' Seat Anchorage Strength', rco=

#### Give counts of the testtypes

In [163]:
cnt = Counter()
for testtype, jobs in all_items.items():
    cnt[testtype] += len(jobs)
    
cnt.most_common()

[('PV Testing', 13), ('Other Product Tests', 9), ('DV Testing', 1)]

#### Give cost of a single line item by index PV Testing

In [162]:
onecost = getattr(all_items['PV Testing'][0], 'cost')
print(onecost)

716.17


#### Give the total cost of all items in PV Testing

In [161]:
total = 0
for i in all_items['PV Testing']:
    total += getattr(i, 'cost')
print(round(total, 2))

21814.46
