In [3]:
import pandas as pd

# Load dataset (assuming a CSV file with relevant data)
df = pd.read_csv('../data/items_cp_stats.csv')

# Set the first row as the column headers
df.columns = df.iloc[0]

# Drop the first row, which is now the header
df = df[1:]

# Reset the index if desired
df.reset_index(drop=True, inplace=True)

df.to_csv('../data/items_cp_stats.csv')

df.head()

Unnamed: 0,FOOD COMMODITY GROUP,FOOD COMMODITY TYPOLOGY,FOOD COMMODITY sub-TYPOLOGY,FOOD COMMODITY ITEM,n,mean,median
0,AGRICULTURAL PROCESSED,BEER,-,BEER IN CAN,5,0.68988,0.693
1,AGRICULTURAL PROCESSED,BEER,-,BEER IN GLASS,9,1.033266667,0.9605
2,AGRICULTURAL PROCESSED,BEER,-,BEER MODULAR CAN,5,0.54414,0.5306
3,AGRICULTURAL PROCESSED,BISCUITS,-,CHOCOLATE OR CREAM FILLED COOKIES**,9,1.691277778,1.533
4,AGRICULTURAL PROCESSED,BISCUITS,-,SIMPLE COOKIES**,12,1.496333333,1.39075


In [4]:
df.columns

Index(['FOOD COMMODITY GROUP', 'FOOD COMMODITY TYPOLOGY',
       'FOOD COMMODITY sub-TYPOLOGY', 'FOOD COMMODITY ITEM', 'n', 'mean',
       'median'],
      dtype='object', name=0)

In [6]:
# Read user input from a text file
def read_user_input(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
    return [line.strip() for line in lines]

In [7]:
def extract_item_quantity(line):
    words = line.split()
    quantity = None
    for i, word in enumerate(words):
        if word.lower().endswith('kg'):
            try:
                quantity = float(word.lower().replace('kg', ''))
            except ValueError:
                continue
            item = ' '.join(words[:i])
            return item, quantity
    return line, quantity

In [8]:
def process_user_input(inputs, data):
    items = data['FOOD COMMODITY ITEM'].str.lower().unique()
    commodity_typologies = data['FOOD COMMODITY TYPOLOGY'].str.lower().unique()
    result = {}

    for line in inputs:
        item, quantity = extract_item_quantity(line)
        item = item.lower()

        # Check if the item exists in the dataset
        if item in items:
            if quantity is None:
                quantity = input(f"Enter the quantity in KG for {item}: ").strip()
            result[item] = quantity
        else:
            # Check if it's a general category like 'beer'
            matched_typology = [typology for typology in commodity_typologies if typology in item]
            if matched_typology:
                specific_items = data[data['FOOD COMMODITY TYPOLOGY'].str.lower() == matched_typology[0]]['FOOD COMMODITY ITEM']
                specific_item = input(f"Please specify the type for {item}: {list(specific_items)}: ").strip()
                if specific_item.lower() in items:
                    if quantity is None:
                        quantity = input(f"Enter the quantity in KG for {specific_item}: ").strip()
                    result[specific_item] = quantity
                else:
                    print(f"Specific item '{specific_item}' not found in the dataset.")
            else:
                print(f"Item '{item}' not found in the dataset.")

    return result

In [11]:
# Main logic
if __name__ == "__main__":
    user_inputs = read_user_input("../data/extracted_text/extracted.txt")
    processed_data = process_user_input(user_inputs, df)

    print(f"Processed Data: {processed_data}")

Item 'chicken' not found in the dataset.
Item 'egg pasta' not found in the dataset.
Processed Data: {'BEER IN CAN': '2 KG', 'chocolate': '5', 'ketchup': '4'}
