<a href="https://colab.research.google.com/github/HAUCommunityFridge/James_Fareshare_Data/blob/main/scripts/datacleaning.ipynb" target="_parent">
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>


In [1]:
# Imports
import pandas as pd
import os
import re
from openpyxl import load_workbook
from datetime import datetime

In [2]:
#@title **Step 1: Clone the GitHub Repository**
# This step clones the required GitHub repository into the Colab environment.

# Define the path where the repository will be cloned
repo_path = '/content/James_Fareshare_Data'

# Check if the repository already exists
if not os.path.exists(repo_path):
    # Clone the repository if it doesn't exist
    !git clone https://github.com/HAUCommunityFridge/James_Fareshare_Data
else:
    # If the repository exists, pull the latest changes
    os.chdir(repo_path)
    !git pull

# Verify cloned repository
print("Repository contents:")
!ls /content/James_Fareshare_Data

Already up to date.
Repository contents:
data  docs  James_Fareshare_Data.Rproj	scripts


In [None]:
#@title **Step 2: Inital loading**

In [5]:
# Define the file path to the Excel file
file_path = "/content/James_Fareshare_Data/data/ORIGINAL Fareshare records from April to 6 Aug plus.xlsx"

# Load the workbook and get the sheet names
workbook = load_workbook(filename=file_path, read_only=True)
sheet_names = workbook.sheetnames

# Specify the sheets to ignore based on their indices
# These sheets may contain metadata or irrelevant information
sheets_to_ignore = [0, 1, 8, 11, 13, 14, 15, 16]

# Exclude the specified sheets from the list of sheets to load
sheets_to_load = [sheet for idx, sheet in enumerate(sheet_names) if idx not in sheets_to_ignore]

In [26]:
#@title **Step 3: Inital data cleaning and merging**

In [6]:
# Define the standard column names to be used across all sheets, including "Stock code"
column_names = ["Date", "Category", "Product.brand..name.and.type", "Barcode", "Quantity", "Unit",
                "Unit.weight", "Total.weight", "Unit.price", "Total.price", "Expiry.date", "Batch.code", "Stock.code", "SheetName"]

# Helper function to standardize column names
def standardize_column_name(name):
    if isinstance(name, str):
        return re.sub(r'[^a-z]', '', name.lower())
    return name

# Function to clean the 'Total.price' column by removing the 'GBP' suffix and converting to a float
def clean_total_price(price):
    if isinstance(price, str) and 'GBP' in price:
        return float(price.replace(' GBP', '').strip())
    return price

# Function to handle orphaned days with user input
def handle_orphaned_days(data):
    last_valid_date = None
    for i, date in enumerate(data['Date']):
        if pd.isna(date):
            continue
        if date.year == 1970:
            # Orphaned day detected
            if last_valid_date:
                # Find the next valid date, ignoring NaT values
                next_valid_date = None
                for j in range(i + 1, len(data)):
                    if pd.notna(data['Date'].iloc[j]):
                        next_valid_date = data['Date'].iloc[j]
                        break

                # Show previous and next date to the user
                print(f"\nOrphaned entry detected at index {i}. Day: {date.day}")
                print(f"Previous date: {last_valid_date}")
                if next_valid_date:
                    print(f"Next date: {next_valid_date}")
                else:
                    print("No next date available.")

                # Prompt the user to confirm or correct the date
                user_input = input(f"Enter the correct date for this entry (YYYY-MM-DD) or press Enter to use {last_valid_date.year}-{last_valid_date.month}-{date.day}: ")
                if user_input:
                    correct_date = pd.to_datetime(user_input)
                else:
                    correct_date = pd.to_datetime(f"{last_valid_date.year}-{last_valid_date.month}-{date.day}")
                data.at[i, 'Date'] = correct_date
            else:
                print(f"No previous valid date to infer from. Entry at index {i} will be skipped.")
        else:
            last_valid_date = date
    return data

# Function to load and clean each sheet
def clean_sheet(sheet_name):
    # Load the sheet into a DataFrame
    data = pd.read_excel(file_path, sheet_name=sheet_name)

    # Print the first few rows of the original sheet for inspection
    print(f"Original data from sheet {sheet_name}:\n", data.head())

    # Standardize the column names
    data.columns = [standardize_column_name(col) for col in data.columns]

    # Print actual column names for debugging
    print(f"Sheet: {sheet_name}, Columns: {data.columns.tolist()}")

    # Limit the columns to only those that are relevant up to and including 'Stock code'
    relevant_columns = column_names[:-1]  # Exclude 'SheetName' for now
    if len(data.columns) > len(relevant_columns):
        data = data.iloc[:, :len(relevant_columns)]

    # Rename columns to the standard column names
    data.columns = relevant_columns[:len(data.columns)]

    # Add a 'SheetName' column
    data['SheetName'] = sheet_name

    # Remove rows with all NaN values
    data = data.dropna(how='all')

    # Process the 'Date' column: convert to datetime and handle missing dates
    if "Date" in data.columns:
        data['Date'] = pd.to_datetime(data['Date'], errors='coerce', dayfirst=True)

        # Handle orphaned day entries with user input
        data = handle_orphaned_days(data)

        # Forward fill any missing dates
        data['Date'] = data['Date'].ffill()

    # Clean the 'Total.price' column
    if 'Total.price' in data.columns:
        data['Total.price'] = data['Total.price'].apply(clean_total_price)
        data['Total.price'] = pd.to_numeric(data['Total.price'], errors='coerce')

    # Remove rows containing 'GBP' for now
    data = data.applymap(lambda x: pd.NA if isinstance(x, str) and 'GBP' in x else x)

    # Drop columns containing only NA values
    data = data.dropna(axis=1, how='all')

    # Check for missing values in the cleaned data
    missing_values = data.isna().sum()
    print(f"Missing values in sheet {sheet_name}:\n{missing_values}")

    # Print the first few rows of the cleaned data
    print(f"Cleaned data from sheet {sheet_name}:\n", data.head())

    return data

# Load and clean all relevant sheets, storing each cleaned DataFrame in a list
data_list = [clean_sheet(sheet) for sheet in sheets_to_load]

# Remove NULL elements from the list, if any
data_list = [data for data in data_list if data is not None]

# Combine all cleaned data into a single DataFrame
if data_list:
    consolidated_data = pd.concat(data_list, ignore_index=True)

    # Ensure 'Stock code' is included and not dropped
    if 'Stock.code' not in consolidated_data.columns:
        consolidated_data['Stock.code'] = pd.NA

    # Remove rows where 'Category' is NaN, as they may not be useful
    consolidated_data = consolidated_data[consolidated_data['Category'].notna()]

    # Remove duplicate rows
    consolidated_data = consolidated_data.drop_duplicates()

    # Check for missing values in the consolidated data
    missing_values_consolidated = consolidated_data.isna().sum()
    print(f"Missing values in consolidated data:\n{missing_values_consolidated}")

    # Save the cleaned data to a new Excel file
    output_file_path = "/content/Cleaned_Fareshare_Data.xlsx"
    consolidated_data.to_excel(output_file_path, index=False)

    print(f"Cleaned data saved to {output_file_path}")

    # Display the first few rows of the cleaned DataFrame
    print(consolidated_data.head())

    # Display the structure of the cleaned DataFrame
    print(consolidated_data.info())
else:
    print("No valid data found to consolidate.")


Original data from sheet Hadley:
     Received          Category  \
0 2024-05-01  Fresh vegetables   
1 2024-05-01               NaN   
2 2024-05-01  Fresh vegetables   
3 2024-05-01               NaN   
4 2024-05-01  Fresh vegetables   

                        Product brand, name and type       Barcode  Quantity  \
0             Tesco Mixed Raisins 60G, Fruit and Veg  3.424605e+06       8.0   
1                                                NaN           NaN       NaN   
2  Tesco Sour Cream Chive Flavoured Pulse & Nut M...  5.059697e+12       7.0   
3                                                NaN           NaN       NaN   
4        Tesco Mixed Peppers 420G (C), Fruit and Veg  3.274880e+06       1.0   

  Unit  Unit weight  Total weight Unit price  Total price  ...  \
0   Kg         0.06          0.51       1.15         9.20  ...   
1  NaN          NaN           NaN       GBP           NaN  ...   
2   Kg         0.03          0.18       0.75         5.95  ...   
3  NaN          

  data = data.applymap(lambda x: pd.NA if isinstance(x, str) and 'GBP' in x else x)
  data = data.applymap(lambda x: pd.NA if isinstance(x, str) and 'GBP' in x else x)


    Received          Category  \
0 2024-08-01  Fresh vegetables   
1 2024-08-01               NaN   
2        NaT  Fresh vegetables   
3        NaT               NaN   
4        NaT  Fresh vegetables   

                        Product brand, name and type     Barcode  Quantity  \
0  Tesco Closed Cup Mushrooms 300G (C), Fruit and...  10111420.0       1.0   
1                                                NaN         NaN       NaN   
2                  Tesco Spinach 250G, Fruit and Veg   3285732.0       1.0   
3                                                NaN         NaN       NaN   
4                  Tesco Houmous 200G, Fruit and Veg  10065907.0       4.0   

  Unit  Unit weight  Total weight Unit price Total price  ...  \
0   Kg         0.32          0.32          1    1.00 GBP  ...   
1  NaN          NaN           NaN        GBP         NaN  ...   
2   Kg         0.28          0.28        1.3    1.30 GBP  ...   
3  NaN          NaN           NaN        GBP         NaN  ...   
4

  data = data.applymap(lambda x: pd.NA if isinstance(x, str) and 'GBP' in x else x)


Original data from sheet Market Drayton (June):
     Received                              Category  \
0 2024-06-01  Chilled products with dairy and eggs   
1 2024-06-01                                   NaN   
2        NaT  Chilled products with dairy and eggs   
3        NaT                                   NaN   
4        NaT  Chilled products with dairy and eggs   

                        Product brand, name and type       Barcode  Quantity  \
0             Tesco Chicken Drumsticks 600G, Chilled  5.054269e+12       1.0   
1                                                NaN           NaN       NaN   
2    Tesco British Whole Milk 568Ml, 1 Pint, Chilled  5.031021e+12       9.0   
3                                                NaN           NaN       NaN   
4  Tesco Organic British Whole Milk 2.272L/4 Pint...  5.050180e+12       6.0   

  Unit  Unit weight  Total weight Unit price Total price  ...  \
0   Kg         0.70          0.70        2.3   2.70 GBP   ...   
1  NaN         

  data = data.applymap(lambda x: pd.NA if isinstance(x, str) and 'GBP' in x else x)


Original data from sheet Market Drayton (May):
               Received          Category  \
0  2024-05-01 00:00:00  Fresh vegetables   
1  2024-05-01 00:00:00               NaN   
2  2024-05-01 00:00:00  Fresh vegetables   
3  2024-05-01 00:00:00               NaN   
4  2024-05-01 00:00:00  Fresh vegetables   

                 Product brand, name and type     Barcode  Quantity Unit  \
0     Tesco Babyleaf Salad 90G, Fruit and Veg   3312957.0       3.0   Kg   
1                                         NaN         NaN       NaN  NaN   
2  Tesco Little Gem 2 Pack (C), Fruit and Veg  10006962.0       2.0   Kg   
3                                         NaN         NaN       NaN  NaN   
4  Tesco Blackberries 150G (C), Fruit and Veg   3257272.0       6.0   Kg   

   Unit weight  Total weight Unit price Total price  ...  2024-05-22 00:00:00  \
0         0.10          0.29        NaN         3.3  ...                22.59   
1          NaN           NaN        NaN         NaN  ...            

  data = data.applymap(lambda x: pd.NA if isinstance(x, str) and 'GBP' in x else x)


Missing values in sheet Market Drayton (May):
Date                              0
Category                        720
Product.brand..name.and.type    720
Barcode                         724
Quantity                        722
Unit                            722
Unit.weight                     722
Total.weight                    720
Unit.price                      768
Total.price                     720
Stock.code                      724
SheetName                         0
dtype: int64
Cleaned data from sheet Market Drayton (May):
         Date          Category                Product.brand..name.and.type  \
0 2024-05-01  Fresh vegetables     Tesco Babyleaf Salad 90G, Fruit and Veg   
1 2024-05-01               NaN                                         NaN   
2 2024-05-01  Fresh vegetables  Tesco Little Gem 2 Pack (C), Fruit and Veg   
3 2024-05-01               NaN                                         NaN   
4 2024-05-01  Fresh vegetables  Tesco Blackberries 150G (C), Fruit and V

  data = data.applymap(lambda x: pd.NA if isinstance(x, str) and 'GBP' in x else x)
  data = data.applymap(lambda x: pd.NA if isinstance(x, str) and 'GBP' in x else x)


Original data from sheet Malinslee:
     Received                              Category  \
0 2024-05-02  Chilled products with dairy and eggs   
1 2024-05-02                                   NaN   
2 2024-05-02  Chilled products with dairy and eggs   
3 2024-05-02                                   NaN   
4 2024-05-02                      Fresh vegetables   

                        Product brand, name and type       Barcode  Quantity  \
0  Tesco British Chicken Breast Mini Fillets 400G...  5.057754e+12       1.0   
1                                                NaN           NaN       NaN   
2        Tesco Chicken Breast Portions 580G, Chilled  5.057754e+12       3.0   
3                                                NaN           NaN       NaN   
4    Tesco Sweet & Crunchy Salad 250G, Fruit and Veg  3.336922e+06       2.0   

  Unit  Unit weight  Total weight Unit price  Total price  ...  \
0   Kg         0.44          0.44       3.75         3.75  ...   
1  NaN          NaN      

  data = data.applymap(lambda x: pd.NA if isinstance(x, str) and 'GBP' in x else x)
  data = data.applymap(lambda x: pd.NA if isinstance(x, str) and 'GBP' in x else x)


Cleaned data saved to /content/Cleaned_Fareshare_Data.xlsx
        Date          Category  \
0 2024-05-01  Fresh vegetables   
2 2024-05-01  Fresh vegetables   
4 2024-05-01  Fresh vegetables   
6 2024-05-01  Fresh vegetables   
8 2024-05-01  Fresh vegetables   

                        Product.brand..name.and.type       Barcode  Quantity  \
0             Tesco Mixed Raisins 60G, Fruit and Veg  3.424605e+06       8.0   
2  Tesco Sour Cream Chive Flavoured Pulse & Nut M...  5.059697e+12       7.0   
4        Tesco Mixed Peppers 420G (C), Fruit and Veg  3.274880e+06       1.0   
6       Tesco Pink Lady Apple Pot 80G, Fruit and Veg  5.057753e+12       2.0   
8  Tesco Nut Bites Mix Baklava Flavour 25G, Fruit...  5.059698e+12      10.0   

  Unit  Unit.weight  Total.weight Unit.price  Total.price  Stock.code  \
0   Kg         0.06          0.51       1.15         9.20  90606230.0   
2   Kg         0.03          0.18       0.75         5.95  91177343.0   
4   Kg         0.57          0.57   

In [None]:
#@title **Step 4: Waitrose data cleaning and merging**

In [7]:
# Load the Waitrose sheet data to inspect it
waitrose_data = pd.read_excel(file_path, sheet_name="Waitrose")

# Display the first few rows of the raw data to understand its structure
print("Raw Waitrose Data:")
print(waitrose_data.head(50))

Raw Waitrose Data:
     Received                              Category  \
0  2024-05-29  Chilled products with dairy and eggs   
1         NaT                          Chilled fish   
2         NaT                          Chilled meat   
3         NaT              Bread and bread products   
4         NaT                      Fresh vegetables   
5         NaT                           Fresh fruit   
6         NaT                           Fresh fruit   
7         NaT                           Fresh fruit   
8  2024-06-08  Chilled products with dairy and eggs   
9         NaT                          Chilled fish   
10        NaT                          Chilled meat   
11        NaT                         Other grocery   
12        NaT              Bread and bread products   
13        NaT                      Fresh vegetables   
14        NaT                           Fresh fruit   
15 2024-06-15                           Fresh fruit   
16        NaT              Bread and bread pro

In [8]:
# Helper function to standardize column names
def standardize_column_name(name):
    if isinstance(name, str):
        return re.sub(r'[^a-z]', '', name.lower())
    return name

# Function to clean the Waitrose sheet
def clean_waitrose_sheet(data):
    # Standardize the column names
    data.columns = [standardize_column_name(col) for col in data.columns]

    # Convert empty strings to NaN
    data.replace("", pd.NA, inplace=True)

    # Fill missing Date values with the last valid observation forward
    data['received'] = data['received'].replace('nan', pd.NA)  # Treat 'nan' as NA
    data['received'].fillna(method='ffill', inplace=True)  # Forward fill the date

    # Ensure the 'received' column is treated as a string before applying string operations
    data['received'] = data['received'].astype(str)

    # Remove duplicate headers
    data = data[~data['received'].str.contains('category', na=False)]

    # Drop completely empty rows
    data.dropna(how='all', inplace=True)

    # Replace remaining NaN values with "N/A"
    data.fillna(value="N/A", inplace=True)

    # Standardize column names to match the other datasets
    column_mappings = {
        'received': 'Date',
        'category': 'Category',
        'productbrandnameandtype': 'Product.brand..name.and.type',
        'quantity': 'Quantity',
        'unit': 'Unit',
        'kgperunit': 'Unit.weight',
        'totalweightkg': 'Total.weight'  # Adjust for total weight column
    }

    data.rename(columns=column_mappings, inplace=True)

    # Check and create missing columns with default values
    required_columns = ['Date', 'Category', 'Product.brand..name.and.type', 'Barcode', 'Quantity',
                        'Unit', 'Unit.weight', 'Total.weight', 'Unit.price', 'Total.price', 'SheetName']

    for col in required_columns:
        if col not in data.columns:
            data[col] = pd.NA

    # Calculate Total Weight if not present
    if data['Total.weight'].isna().all() and 'Unit.weight' in data.columns and 'Quantity' in data.columns:
        data['Total.weight'] = pd.to_numeric(data['Quantity'], errors='coerce') * pd.to_numeric(data['Unit.weight'], errors='coerce')

    # Format Date to remove time
    data['Date'] = pd.to_datetime(data['Date'], errors='coerce').dt.date

    # Add a column for the sheet name
    data['SheetName'] = 'Waitrose'

    # Rearrange columns to match the desired output
    data = data[required_columns]

    # Print the cleaned data for debugging
    print(f"Cleaned Waitrose Data:\n{data.head()}")

    return data

# Load the Waitrose sheet with only the required columns
file_path = "/content/James_Fareshare_Data/data/ORIGINAL Fareshare records from April to 6 Aug plus.xlsx"
waitrose_data = pd.read_excel(file_path, sheet_name="Waitrose", usecols="A:I")  # Adjust the range to include only the necessary columns

# Apply the custom cleaning function to the Waitrose data
cleaned_waitrose_data = clean_waitrose_sheet(waitrose_data)

# Save the cleaned Waitrose data to an Excel file
waitrose_output_file_path = "/content/Cleaned_Waitrose_Data.xlsx"
cleaned_waitrose_data.to_excel(waitrose_output_file_path, index=False)

print(f"Cleaned Waitrose data saved to {waitrose_output_file_path}")

Cleaned Waitrose Data:
         Date                              Category  \
0  2024-05-29  Chilled products with dairy and eggs   
1  2024-05-29                          Chilled fish   
2  2024-05-29                          Chilled meat   
3  2024-05-29              Bread and bread products   
4  2024-05-29                      Fresh vegetables   

  Product.brand..name.and.type Barcode  Quantity   Unit  Unit.weight  \
0                          N/A    <NA>       2.5  crate          6.5   
1                          N/A    <NA>       0.5  crate          6.5   
2                          N/A    <NA>       1.0  crate          6.5   
3                          N/A    <NA>       1.5  crate          6.5   
4     Lettuce and spring onion    <NA>       3.0  crate          6.5   

   Total.weight Unit.price Total.price SheetName  
0         16.25       <NA>        <NA>  Waitrose  
1          3.25       <NA>        <NA>  Waitrose  
2          6.50       <NA>        <NA>  Waitrose  
3        

  data['received'].fillna(method='ffill', inplace=True)  # Forward fill the date
  data.fillna(value="N/A", inplace=True)


In [9]:
# Load the previously cleaned Fareshare data
fareshare_data = pd.read_excel("/content/Cleaned_Fareshare_Data.xlsx")

# Append the cleaned Waitrose data
merged_data = pd.concat([fareshare_data, cleaned_waitrose_data], ignore_index=True)

# Save the merged dataset including Waitrose data
merged_output_file_path = "/content/Merged_Fareshare_With_Waitrose_Data.xlsx"
merged_data.to_excel(merged_output_file_path, index=False)

print(f"Merged data including Waitrose saved to {merged_output_file_path}")

# Load the merged dataset to verify
final_merged_data = pd.read_excel(merged_output_file_path)

# Display the first few rows to ensure the data is correctly combined
print("Final Merged Data:")
print(final_merged_data.head())

  merged_data = pd.concat([fareshare_data, cleaned_waitrose_data], ignore_index=True)


Merged data including Waitrose saved to /content/Merged_Fareshare_With_Waitrose_Data.xlsx
Final Merged Data:
        Date          Category  \
0 2024-05-01  Fresh vegetables   
1 2024-05-01  Fresh vegetables   
2 2024-05-01  Fresh vegetables   
3 2024-05-01  Fresh vegetables   
4 2024-05-01  Fresh vegetables   

                        Product.brand..name.and.type       Barcode  Quantity  \
0             Tesco Mixed Raisins 60G, Fruit and Veg  3.424605e+06       8.0   
1  Tesco Sour Cream Chive Flavoured Pulse & Nut M...  5.059697e+12       7.0   
2        Tesco Mixed Peppers 420G (C), Fruit and Veg  3.274880e+06       1.0   
3       Tesco Pink Lady Apple Pot 80G, Fruit and Veg  5.057753e+12       2.0   
4  Tesco Nut Bites Mix Baklava Flavour 25G, Fruit...  5.059698e+12      10.0   

  Unit  Unit.weight  Total.weight  Unit.price  Total.price  Stock.code  \
0   Kg         0.06          0.51        1.15         9.20  90606230.0   
1   Kg         0.03          0.18        0.75         5.9

In [12]:

# Load the merged data
file_path = "/content/Merged_Fareshare_With_Waitrose_Data.xlsx"
merged_data = pd.read_excel(file_path)

# Rename the columns as per your request
merged_data.rename(columns={
    'Product.brand..name.and.type': 'Product',
    'Unit.weight': 'Unit_Wt_Kg',
    'Total.weight': 'Total_Wt_Kg',
    'Unit.price': 'Unit_Price_GBP',
    'Total.price': 'Total_Price_GBP',
    'Stock.code': 'Stock_Code',
    'SheetName': 'Store'
}, inplace=True)

# Convert dates to ISO format (YYYY-MM-DD)
merged_data['Date'] = pd.to_datetime(merged_data['Date'], errors='coerce').dt.strftime('%Y-%m-%d')

# Define a function to remove anything in parentheses
def remove_parentheses(text):
    return re.sub(r'\s*\(.*?\)\s*', '', str(text))

# Apply the function to remove parentheses across all columns
merged_data = merged_data.applymap(remove_parentheses)

# Replace NA, n/a, N/A with blank cells
merged_data.replace(["NA", "n/a", "N/A"], "", inplace=True)

# Update the 'Unit' column: change "Kg" to "Item", leave "Crate" as is
merged_data['Unit'] = merged_data['Unit'].astype(str).apply(lambda x: 'Item' if x.lower() == 'kg' else x)

# Sort the dataset by Date
merged_data = merged_data.sort_values(by='Date')

# Save the cleaned and ordered dataset to a new Excel file
final_output_path = "/content/Cleaned_Ordered_Fareshare_Data.xlsx"
merged_data.to_excel(final_output_path, index=False)

print(f"Final cleaned and ordered data saved to {final_output_path}")

# Display the first few rows of the cleaned dataframe to verify changes
print(merged_data.head())

  merged_data = merged_data.applymap(remove_parentheses)


Final cleaned and ordered data saved to /content/Cleaned_Ordered_Fareshare_Data.xlsx
            Date                  Category  \
2734  2024-04-22  Bread and bread products   
2724  2024-04-22          Fresh vegetables   
2725  2024-04-22          Fresh vegetables   
2726  2024-04-22          Fresh vegetables   
2727  2024-04-22          Fresh vegetables   

                                                Product     Barcode Quantity  \
2734          Tesco Large Wholemeal Baps 4 Pack, Bakery   3012369.0      4.0   
2724         Jaffa Orange Minimum 4 Pack, Fruit and Veg  10004241.0      3.0   
2725                        Lemons 4Pack, Fruit and Veg  10098752.0      9.0   
2726     Tesco Granny Smith Apple 6 Pack, Fruit and Veg   3249499.0      3.0   
2727  Tesco Tenderstem & Mixed Vegetable 160G, Fruit...   3285541.0     10.0   

      Unit Unit_Wt_Kg Total_Wt_Kg Unit_Price_GBP Total_Price_GBP  Stock_Code  \
2734  Item       0.37         1.5            0.9             3.6  51345211.0 