# 0. Import libraries <br>
`tika` for extracting text from PDFs <br>
`pandas` for data handling <br>
`glob` for catching all PDFs in a folder

In [1]:
from tika import parser
import pandas as pd
import glob

# 1. Gather Data

Write the content of the parsed pdf (`parsedPDF`) in the variable `content`, seperate the lines indicated with '\n' and filter all lines which only contain '' or ' '.

In [2]:
def parse_pdf(file):
    """Return the content of the pdf with seperated lines and without empty lines."""
    parsedPDF = parser.from_file(file)
    content = parsedPDF['content']
    contentlist = content.split('\n')
    contentlist = list(filter(lambda a: a != '', contentlist))
    contentlist = list(filter(lambda a: a != ' ', contentlist))
    return contentlist

# 2. Asses and Clean Data

Define `find_int()` as function which checks wether an entry is of the data type integer or not.

In [3]:
def find_int(entry):
    """Return either True, if the input is an integer or False, if the input is no integer."""
    try:
        out = isinstance(int(entry), int)
    except:
        out = False
    return out

Define `is_float()` as function which checks wether an entry is of the data type integer or not.

In [4]:
def is_float(entry):
    """Return either True, if the input is an float or False, if the input is no float."""
    try:
        out = isinstance(float(entry), float)
    except:
        out = False
    return out

Define the function `position()` which extracts for every position in the receipt the paid price (`paid_price_list`), the original price without discount (`original_price_list`), amount purchased (`amount_list`), item name (`item_list`), packaging (`packaging_list`), if the item was reduced (`reduced_list`), paid price per unit (`paid_price_per_unit_list`), original price per unit (`original_price_per_unit_list`), amount of packaging without dimension (`unit_list`) and the dimension of packaging (`dimension_list`). All exctrated information is stored in a dataframe (`art_df`).

In [5]:
def positions(t, contentlist, art_df):
    """
    Return the all information of a position in a delivery regarding price, reduction, name, amount and packaging.
    
        Parameters:
            t (dataframe): dataframe of all positions in a delivery
            contentlist (list): content of the pdf with seperated lines and without empty lines
            art_df (dataframe): dataframe where the details of every position can be stored with the length of `t`

        Returns:
            art_df (dataframe): dataframe with eleven additional columns for the paid price, the original price without discount,
             amount purchased, item name, packaging, if the item was reduced, paid price per unit, original price per unit,
             amount of packaging without dimension and the dimension of packaging.
    """
    #Initialize list variables
    paid_price_list = []
    original_price_list = []
    amount_list = []
    item_list = []
    packaging_list = []

    #Find end of receipt
    stop = 0
    for idx, entry in enumerate(contentlist):
        if entry.split(' ')[0] == 'Pfand':
            stop = idx
            search_string = 'Pfand'
            break
        elif entry.split(' ')[0] == 'Zwischensumme':
            stop = idx
            search_string = 'Zwischensumme'
            break
        elif entry.split(' ')[0] == 'Gesamtbetrag':
            stop = idx
            search_string = 'Gesamtbetrag'
            break      

    #Loop  for every position
    idx_list = pd.Series(t[t].index)
    idx_list = idx_list[idx_list<stop]

    for position in idx_list:
        
        #get item name
        if position < stop:
            item_list.append(contentlist[position+1])
        
        #get amount
        if position < stop:
            amount_list.append(int(contentlist[position]))

        #get packaging
        if position < stop:
            txt = contentlist[position+2].split(' ')[-1]
            if not is_float(txt):
                text = contentlist[position+2]
            else:
                text = ' '.join(contentlist[position+2].split(' ')[:-1])
            packaging_list.append(text)

        #get paid price and original price
        if position < stop:
            if position > pd.Series(t[t].index)[0]:
                if contentlist[position-1][:7] == 'http://':
                    txt = contentlist[position-2]
                elif contentlist[position-1][:9] == 'Bestellnr':
                    txt = contentlist[position-3]
                else:
                    txt = contentlist[position-1]
                paid_price_list.append(float(txt.split(' ')[-1]))
                if not is_float(contentlist[position-2]):
                    original_price_list.append(float(txt.split(' ')[-1]))
                else:
                    original_price_list.append(float(contentlist[position-2]))

    #get paid price of last article
    last_text = ''.join(contentlist[stop-1:])
    last_end = last_text.find(search_string)
    last_start = last_text[:last_end].rfind(' ')
    paid_price_list.append(float(last_text[last_start+1:last_end]))

    #get original price of last article
    for idx, entry in enumerate(contentlist):
        if entry.split(' ')[0] == search_string:
            check = is_float(contentlist[idx-2])
            if not check:
                original_price_list.append(float(last_text[last_start+1:last_end]))
            else:
                original_price_list.append(float(contentlist[idx-2]))
   
    #calculate reduced list
    reduced_list = [i<j for i, j in zip(paid_price_list, original_price_list)]

    #calculate paid price per unit
    paid_price_per_unit_list = [round(i/j,2) for i, j in zip(paid_price_list, amount_list)]

    #calculate original price per unit
    original_price_per_unit_list = [i / j for i, j in zip(original_price_list, amount_list)]

    #get the amount of packaging without dimension
    unit_list = []
    for package in packaging_list:
        unit = [(i, c) for i,c in enumerate(package) if c.isdigit()]
        if len(unit) > 0:
            unit_list.append(package[[(i, c) for i,c in enumerate(package) if c.isdigit()][-1][0]+1:].strip())
        else:
            unit_list.append('')
    
    #get the dimension of the packaging
    dimension_list = []
    for package in packaging_list:
        package = package.replace(',','.')
        txt = [(i, c) for i,c in enumerate(package) if c.isdigit()]
        if len(txt) > 0:
            pack = package[:[(i, c) for i,c in enumerate(package) if c.isdigit()][-1][0]+1]
        else:
            pack = '1'
        mult =  pack.find(' x ')
        if mult > 0:
            if is_float(pack[:mult]) and is_float(pack[mult+3:]):
                pack = float(float(pack[:mult])*float(pack[mult+3:]))
            else:
                pack = float(pack.split(' ')[-1])
        else:
            if is_float(pack):
                pack = float(pack)
            else:
                pack = float(pack.split(' ')[-1])
        dimension_list.append(pack)
    
    #write all lists in dataframe
    art_df['position'] = range(1,len(item_list)+1)
    art_df['article'] = item_list
    art_df['paid per unit'] = paid_price_per_unit_list
    art_df['amount'] = amount_list
    art_df['paid per position'] = paid_price_list
    art_df['reduced'] = reduced_list    
    art_df['original price per unit'] = original_price_per_unit_list
    art_df['original price per position'] = original_price_list
    art_df['binding'] = packaging_list
    art_df['quantity'] = dimension_list
    art_df['unit'] = unit_list

    return art_df, idx_list

Define the function `delivery()` which extracts the delivery date (`month` and `day`) for every delivery and stores it in the dataframe `art_df`.

In [6]:
def delivery(contentlist, art_df, idx_list):
    """
    Return the day and month of the delivery
    
        Parameters:
            contentlist (list): content of the pdf with seperated lines and without empty lines
            art_df (dataframe): dataframe where the delivery date can be stored with the length of `idx_list`
            idx_list (list): list of positions of a delivery

        Returns:
            art_df (dataframe): Dataframe with two additional columns for day and month of the delivery
    """
    end = ''.join(contentlist[0:idx_list[0]-2]).find('Klick')
    txt = ''.join(contentlist[0:idx_list[0]-2])[:end-2]
    start = txt.find('.')

    month = txt[start+1:].strip()
    day = txt[:start][-2:].strip()

    if month == 'Januar' or month == 'Janua' or month == 'Januar.':
        month = 1
    elif month == 'Februar' or month == 'Februa' or month == 'Februar.':
        month = 2
    elif month == 'März' or month == 'Mär' or month == 'März.':
        month = 3
    elif month == 'April' or month == 'Apri' or month == 'April.':
        month = 4
    elif month == 'Mai' or month == 'Ma' or month == 'Mai.':
        month = 5
    elif month == 'Juni' or month == 'Jun' or month == 'Juni.':
        month = 6
    elif month == 'Juli' or month == 'Jul' or month == 'Juli.':
        month = 7
    elif month == 'August' or month == 'Augus' or month == 'August.':
        month = 8
    elif month == 'September' or month == 'Septembe' or month == 'September.':
        month = 9
    elif month == 'Oktober' or month == 'Oktobe' or month == 'Oktober.':
        month = 10
    elif month == 'November' or month == 'Novembe' or month == 'November.':
        month = 11
    elif month == 'Dezember' or month == 'Dezembe' or month == 'Dezember.':
        month = 12
    
    art_df['delivery_day'] = day
    art_df['delivery_month'] = month

    return art_df

Define the function `deliveryID()` which extracts the delivery ID from the file name and stores it in the dataframe `art_df`.

In [7]:
def deliveryID(art_df, file):
    """Return the delivery ID from the filename."""
    art_df['deliveryID'] = file
    return art_df

Define the function `order_nr()` which extracts the order IDs from the receipts and stores it in the dataframe `art_df`.

In [8]:
def order_nr(t2, contentlist, art_df, idx_list):
    """
    Return the order IDs of the delivery
    
        Parameters:
            t2 (dataframe): dataframe of all orders in a delivery
            contentlist (list): content of the pdf with seperated lines and without empty lines
            art_df (dataframe): dataframe where the order ID can be stored with the length of `idx_list`
            idx_list (list): list of positions of a delivery

        Returns:
            art_df (dataframe): dataframe with one additional column for order ID
            idx_order_nr (series): series of the index of t2
            order_nr_list (list): list of all order IDs
    """
    order_nr_list = []
    for order_nr in pd.Series(t2[t2].index):
        space = contentlist[order_nr].find(' ')
        order_nr_list.append(contentlist[order_nr][space+1:])
    
    idx_item = idx_list
    idx_order_nr = pd.Series(t2[t2].index)
    
    order_list = []
    k = -1
    for i in range(len(idx_item)):
        if k < len(idx_order_nr)-1 and i < len(idx_item)-1:
            if idx_item[i] > idx_order_nr[k+1]:
                k += 1
        order_list.append(order_nr_list[k])
    
    art_df['order-nr'] = order_list

    return art_df, idx_order_nr, order_nr_list
    

Define the function `added()` which extracts the day (`add_day_list`) and month (`add_month_list`) an order was added to the delivery and stores it in the dataframe `art_df`.

In [9]:
def added(idx_order_nr, contentlist, order_nr_list, art_df):
    """
    Return the date when the order was added to the delivery
    
        Parameters:
            idx_order_nr (series): series of the index of all orders 
            contentlist (list): content of the pdf with seperated lines and without empty lines
            order_nr_list (list): list of all order IDs
            art_df (dataframe): dataframe where the order ID can be stored with the length of `idx_list`

        Returns:
            art_df (dataframe): dataframe with two additional columns day and month of the added order
    """
    add_month_list = []
    add_day_list = []

    for idx in idx_order_nr-1:

        dot = contentlist[idx].find('.')
        month = contentlist[idx][dot+2:].strip()
        
        if month == 'Januar' or month == 'Janua' or month == 'Januar.':
            month = 1
        elif month == 'Februar' or month == 'Februa' or month == 'Februar.':
            month = 2
        elif month == 'März' or month == 'Mär' or month == 'März.':
            month = 3
        elif month == 'April' or month == 'Apri' or month == 'April.':
            month = 4
        elif month == 'Mai' or month == 'Ma' or month == 'Mai.':
            month = 5
        elif month == 'Juni' or month == 'Jun' or month == 'Juni.':
            month = 6
        elif month == 'Juli' or month == 'Jul' or month == 'Juli.':
            month = 7
        elif month == 'August' or month == 'Augus' or month == 'August.':
            month = 8
        elif month == 'September' or month == 'Septembe' or month == 'September.':
            month = 9
        elif month == 'Oktober' or month == 'Oktobe' or month == 'Oktober.':
            month = 10
        elif month == 'November' or month == 'Novembe' or month == 'November.':
            month = 11
        elif month == 'Dezember' or month == 'Dezembe' or month == 'Dezember.':
            month = 12
        
        add_month_list.append(month)
        add_day_list.append(contentlist[idx][:dot][-2:].strip())
    
    add_df = pd.DataFrame({'order-nr.': order_nr_list, 'add_day': add_day_list, 'add_month': add_month_list})

    art_df = art_df.join(add_df.set_index('order-nr.'), on=('order-nr'))

    return art_df

Define the function `pfand()` which extracts the deposit (`pfand`) and stores it in the dataframe `art_df`.

In [10]:
def pfand(contentlist, art_df):
    """Return the amount of deposit paid back in the delivery."""
    pfand = 0.0
    for entry in contentlist:
        if entry[:19] == 'Eingereichtes Pfand':
            pfand = float(entry.split(' ')[-1])
    
    art_df['pfand'] = pfand
    
    return art_df

The main routine which executes all defined functions in the right order for all PDFs and stores the resulting dataframe (`table`) in an excel file.

In [11]:
from tqdm.notebook import tqdm

files = glob.glob('*.pdf')
table = pd.DataFrame(columns=['position','article','paid per unit','amount','paid per position','reduced','original price per unit','original price per position','binding','quantity','unit','delivery_day','delivery_month','deliveryID','order-nr','add_day','add_month','pfand'])

for file in tqdm(files):
    print(file)
    art_df = pd.DataFrame()
    contentlist = parse_pdf(file)
    t = pd.DataFrame(contentlist).apply(lambda x: find_int(x), axis=1)
    art_df, idx_list = positions(t,contentlist, art_df)
    art_df = delivery(contentlist, art_df, idx_list)
    art_df = deliveryID(art_df, file)
    t2 = pd.DataFrame(contentlist).apply(lambda x: str(x)[5:14]=='Bestellnr', axis=1)
    art_df, idx_order_nr, order_nr_list = order_nr(t2, contentlist, art_df, idx_list)
    art_df = added(idx_order_nr, contentlist, order_nr_list, art_df)
    art_df = pfand(contentlist, art_df)
    table = pd.concat([table, art_df], ignore_index=True)

table.to_excel('table.xlsx')

  0%|          | 0/194 [00:00<?, ?it/s]

1.pdf


2023-10-23 12:45:16,009 [MainThread  ] [WARNI]  Failed to see startup log message; retrying...
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


10.pdf
100.pdf
101.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


102.pdf
103.pdf
104.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


105.pdf
106.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


107.pdf
108.pdf
109.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


11.pdf
110.pdf
111.pdf
112.pdf
113.pdf
114.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


115.pdf
116.pdf
117.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


118.pdf
119.pdf
12.pdf
120.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


121.pdf
122.pdf
123.pdf
124.pdf
125.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


126.pdf
127.pdf
128.pdf
129.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


13.pdf
130.pdf
131.pdf
132.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


133.pdf
134.pdf
135.pdf
136.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


137.pdf
138.pdf
139.pdf
14.pdf
140.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


141.pdf
142.pdf
143.pdf
144.pdf
145.pdf
146.pdf
147.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


148.pdf
149.pdf
15.pdf
150.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


151.pdf
152.pdf
153.pdf
154.pdf
155.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


156.pdf
157.pdf
158.pdf
159.pdf
16.pdf
160.pdf
161.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


162.pdf
163.pdf
164.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


165.pdf
166.pdf
167.pdf
168.pdf
169.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


17.pdf
170.pdf
171.pdf
172.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


173.pdf
174.pdf
175.pdf
176.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


177.pdf
178.pdf
179.pdf
18.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


180.pdf
181.pdf
182.pdf
183.pdf
184.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


185.pdf
186.pdf
187.pdf
188.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


189.pdf
19.pdf
190.pdf
191.pdf
192.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


193.pdf
194.pdf
2.pdf
20.pdf
21.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


22.pdf
23.pdf
24.pdf
25.pdf
26.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


27.pdf
28.pdf
29.pdf
3.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


30.pdf
31.pdf
32.pdf
33.pdf
34.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


35.pdf
36.pdf
37.pdf
38.pdf
39.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


4.pdf
40.pdf
41.pdf
42.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


43.pdf
44.pdf
45.pdf
46.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


47.pdf
48.pdf
49.pdf
5.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


50.pdf
51.pdf
52.pdf
53.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


54.pdf
55.pdf
56.pdf
57.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


58.pdf
59.pdf
6.pdf
60.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


61.pdf
62.pdf
63.pdf
64.pdf
65.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


66.pdf
67.pdf
68.pdf
69.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


7.pdf
70.pdf
71.pdf
72.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


73.pdf
74.pdf
75.pdf
76.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


77.pdf
78.pdf
79.pdf
8.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


80.pdf
81.pdf
82.pdf
83.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


84.pdf
85.pdf
86.pdf
87.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


88.pdf
89.pdf
9.pdf
90.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


91.pdf
92.pdf
93.pdf
94.pdf
95.pdf


  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)
  out = isinstance(int(entry), int)


96.pdf
97.pdf
98.pdf
99.pdf


  out = isinstance(int(entry), int)


In [12]:
table

Unnamed: 0,position,article,paid per unit,amount,paid per position,reduced,original price per unit,original price per position,binding,quantity,unit,delivery_day,delivery_month,deliveryID,order-nr,add_day,add_month,pfand
0,1,Leimer Backerbsen,1.19,1,1.19,False,1.19,1.19,200g,200.0,g,16,9,1.pdf,806-062-0784,13,9,0.00
1,2,Radieschen,0.59,1,0.59,False,0.59,0.59,1 Bund,1.0,Bund,16,9,1.pdf,806-062-0784,13,9,0.00
2,3,Elinas Joghurt Honig Griechischer Art,1.89,1,1.89,False,1.89,1.89,4 x 150g,600.0,g,16,9,1.pdf,806-062-0784,13,9,0.00
3,4,Knorr Buchstaben Suppe,0.89,2,1.78,False,0.89,1.78,82g,82.0,g,16,9,1.pdf,806-062-0784,13,9,0.00
4,5,Knorr Hühnersuppe Nudeltopf Mix,0.89,2,1.78,False,0.89,1.78,69g,69.0,g,16,9,1.pdf,806-062-0784,13,9,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6967,49,Dahlhoff Aioli Creme,2.19,1,2.19,False,2.19,2.19,250g,250.0,g,10,5,99.pdf,103-773-0415,9,5,-6.03
6968,50,Originale Grana Padano gerieben,1.69,1,1.69,False,1.69,1.69,100g,100.0,g,10,5,99.pdf,105-093-0132,9,5,-6.03
6969,51,"Joghurt 1,5%",0.49,1,0.49,False,0.49,0.49,500g,500.0,g,10,5,99.pdf,105-093-0330,9,5,-6.03
6970,52,Schlagsahne 30%,0.69,2,1.38,False,0.69,1.38,200g,200.0,g,10,5,99.pdf,105-093-0330,9,5,-6.03
