In [None]:
#Imports
from datetime import datetime
import pandas as pd
from enum import Enum
from os import path
import time
import sys
# import re #for cleaning the data

class Products(Enum):
    #Add products like this ProductName = index iteration, [], [] 
    #the 2 empty list will be filled in using the ProductsLoader class
    Bacon = 0, [], []
    Eggs = 1, [], []
    HeirloomTomatoes = 2, [], []


class DataCleaner():
    Data = {}
    def cleanUp(self, input, inputType, url):
        self.productType = inputType
        #Define the input as a class global so the array can be used through out the class
        if(self.productType == Products.Bacon.name):
            self.Data = {'Product Type': input[0],
                         'Current Price': input[1],
                         'Orignal Price': input[2],
                         'Weight in lbs': None,
                         'True Weight': input[4],
                         'Brand': input[3],
                         'Local': None,
                         'Address': None,
                         'State': None, 
                         'City': None, 
                         'Zip Code': None, 
                         'Date Collected': str(datetime(datetime.today().year, datetime.today().month, datetime.today().day))[:-9], 
                         'Url': url
                        }
            if(self.Data['True Weight'] == None):
                self.Data['True Weight'] = self.findWeight()
            if(self.Data['True Weight'] != None):
                self.Data['Weight in lbs'] = self.ozToLb(self.Data['True Weight'])
        elif(self.productType == Products.Eggs.name):
            self.Data = {'Product Type': input[0],
                         'Current Price': input[1],
                         'Orignal Price': input[2],
                         'Amount in dz': None,
                         'True Amount': input[4],
                         'Brand': input[3],
                         'Local': None,
                         'Address': None,
                         'State': None, 
                         'City': None, 
                         'Zip Code': None, 
                         'Date Collected': str(datetime(datetime.today().year, datetime.today().month, datetime.today().day))[:-9], 
                         'Url': url
                        }
            self.eggConverter()

        elif(self.productType == Products.HeirloomTomatoes.name):
            self.Data = {'Product Type': input[0],
                         'Current Price': input[1],
                         'Orignal Price': input[2],
                         'Weight in lbs': None,
                         'True Weight': None,
                         'Brand': input[3],
                         'Organic': None,
                         'Local': None,
                         'Address': None,
                         'State': None, 
                         'City': None, 
                         'Zip Code': None, 
                         'Date Collected': str(datetime(datetime.today().year, datetime.today().month, datetime.today().day))[:-9], 
                         'Url': url
                        }
            self.tomatoModification(input[4], input[5])
        else:
            return None
        self.setLocationalData()
        self.determineLocality()
        self.cleanPricing()
        return list(self.Data.values())
    
    def cleanPricing(self):
        price = ''.join(c for c in self.Data['Current Price'] if c.isdigit() or c == '.')
        if len(price) == 0:
            return
        self.Data['Current Price'] = float(price)
        if self.Data['Orignal Price'] == None:
            self.Data['Orignal Price'] = self.Data['Current Price']
            return
        price = ''.join(c for c in self.Data['Orignal Price'] if c.isdigit() or c == '.')
        if len(price) == 0:
            self.Data['Orignal Price'] = self.Data['Current Price']
        else:
            self.Data['Orignal Price'] = float(price)
        
    def ozToLb(self, input):
        weight = str(input).lower()
        if 'oz' in weight:
            return self.stringValueExtraction(weight, 'oz') / 16.0
        elif '/lb' in weight:
            value = self.stringValueExtraction(weight, '/lb')
            if value == None:
                return 1.0
            return value
        elif 'lb' in weight:
            return self.stringValueExtraction(weight, 'lb')
        return weight

    #Tomatoes are tricky so we have a function that does this part
    def tomatoModification(self, byWeight, size):
        #We can extract Organic from the name
        if self.Data['Organic'] == None:
            if 'organic' in self.Data['Product Type'].lower().replace(' ', ''): # convert to lowercase and remove spaces
                self.Data['Organic'] = 'Organic'
        #This part is for Weight
        if size != None:
            self.Data['True Weight'] = size
        elif byWeight != None:
            self.Data['True Weight'] = byWeight
        elif self.Data['True Weight'] == None:
            #Checking these places for clues
            checkLocations = [self.Data['Current Price'],
                              self.Data['Product Type'],
                              self.Data['Orignal Price']]
            for string in checkLocations:
                if '/ea' in string:
                    self.Data['True Weight'] = f"{self.stringValueExtraction(string, '/ea')}/ea"
                    break
                elif '/lb' in string:
                    weight = self.stringValueExtraction(string, '/lb')
                    self.Data['True Weight'] = f"{weight}/lb"
                    self.Data['Weight in lbs'] = 1.0
                    return
            return
        if '/lb' in self.Data['True Weight']:
            self.Data['Weight in lbs'] = self.ozToLb(self.Data['True Weight'])
        
    #Helper to reduce code. Splits the string and returns the float value 
    def stringValueExtraction(self, string, stringType):
        value = ''.join(filter(lambda x: x.isdigit() or x == '.', string.split(stringType)[0]))
        if(len(value) == 0):
            return None
        return float(value)

    #If no weight is given we look at other places that could have what we need
    #This Determines if a list talking about weights in ounces or pounds.
    def findWeight(self):
        #Checking these places for clues
        checkLocations = [self.Data['Current Price'], self.Data['Product Type'], self.Data['Orignal Price']]
        for string in checkLocations:
            if string == None:
                continue
            string = string.lower().replace(' ', '') # convert to lowercase and remove spaces
            if 'pound' in string:
                return  f"{self.stringValueExtraction(string, 'pound')} lb"
            elif 'ounce' in string:
                return f"{self.stringValueExtraction(string, 'ounce')} oz"
            elif '/lb' in string:
                return f"{1.0} lb"
            elif 'lb' in string:
                return f"{self.stringValueExtraction(string, 'lb')} lb"
            elif 'oz' in string:
                return f"{self.stringValueExtraction(string, 'oz')} oz"
        return None

    #Eggs don't have weight so we use amount
    def eggConverter(self):
        if self.Data['True Amount'] == None:
            checkLocations = [self.Data['Product Type'],self.Data['Current Price'],self.Data['Orignal Price']]
            for string in checkLocations:
                string = string.lower().replace(' ', '') # convert to lowercase and remove spaces
                if 'dozen' in string:
                    amount = self.stringValueExtraction(string, 'dozen')
                    if amount == None:
                        self.Data['True Amount'] = f"{1} dz"
                        self.Data['Amount in dz'] = 1.0
                        return
                    self.Data['True Amount'] = f"{amount} dz"  
                    self.Data['Amount in dz'] = amount
                    return  
                if 'dz' in string:
                    amount = self.stringValueExtraction(string, 'dz')
                    self.Data['True Amount'] = f"{amount} dz"
                    self.Data['Amount in dz'] = amount
                    return 
                if 'ct' in string:
                    amount = self.stringValueExtraction(string, 'ct')
                    self.Data['True Amount'] = f"{amount} ct"  
                    self.Data['Amount in dz'] = amount / 12
                    return
        else:
            string = self.Data['True Amount'].lower().replace(' ', '')
            if 'dozen' in string:
                amount = self.stringValueExtraction(string, 'dozen')
                if amount == None:
                    self.Data['Amount in dz'] = 1.0
                    return
                self.Data['Amount in dz'] = amount
            if 'dz' in string:
                self.Data['Amount in dz'] = self.stringValueExtraction(string, 'dz')
            if 'ct' in string:
                self.Data['Amount in dz'] = self.stringValueExtraction(string, 'ct') / 12

    def determineLocality(self):
        localBrands = None
        nonlocalBrands = None
        #For speed we use sets and turn everyting to lowercase and no spaces for accuracy
        if(self.productType == Products.Bacon.name):
            #This is for read ability 
            #['dm bacon co', 'des moines bacon co.', 'webster city', 'berkwood farms', 'berkwood farms', 'berkwood farms', 'berkwood farms', 'berkwood farms', 'de bruin ranch']
            #['niman ranch', 'jolly posh', 'nueske', 'niman ranch']
            localBrands = {'dmbaconco', 'desmoinesbaconco.', 'webstercity', 'berkwoodfarms', 'berkwoodfarms', 'berkwoodfarms', 'berkwoodfarms', 'berkwoodfarms', 'debruinranch'}
            nonlocalBrands = {'nimanranch', 'jollyposh', 'nueske', 'nimanranch'}
        elif(self.productType == Products.Eggs.name):
            localBrands = {}
            nonlocalBrands ={}
        elif(self.productType == Products.HeirloomTomatoes.name):
            localBrands = {}
            nonlocalBrands ={}    
            #Sometimes it says it the name 
            if 'local' in self.Data['Product Type'].lower().replace(' ', ''): # convert to lowercase and remove spaces
                self.Data['Local'] = 'Local'
                return
        #Add product brands here
        else:
            #Catch if no brands are set for the product
            return 
        if self.Data['Brand'] == None:
            #If no brand was set we look at the product name
            brand = self.Data['Product Type']
        else:
            brand = self.Data['Brand'].lower().replace(' ', '')
        if brand in localBrands:
            self.Data['Local'] = "Local"
        elif brand in nonlocalBrands:
            self.Data['Local'] = "Non-local"
        else:
           self.Data['Local'] = "None Listed"

    def setLocationalData(self):
        self.Data['Address'] = '2002 Woodland Avenue'
        self.Data['State'] = 'IA'
        self.Data['City'] = 'Des Moines'
        self.Data['Zip Code'] = '50312'


# print(len(baconFrameColumns))

test1 = ['HARDWOOD UNCURED BACON  99/lb', '$10.99/ea Product Price', None, 'DM BACON CO', '5 lb']
test2 = ['SLICED SLAB BACON', '$7.99 Sale Price', '$9.99/lb', 'WEBSTER CITY', "20 OZ"]
baconFrame = pd.DataFrame(columns=['Bacon', 'Current Price', 'Orignal Price', 'Weight in lbs', 'True Weight', 'Brand', 'Local', 'Address', 'State', 'City', 'Zip Code', 'Date Collected', 'Url'])
eggFrame = pd.DataFrame(columns=['Egg', 'Current Price', 'Orignal Price', 'Amount in dz', 'True Amount', 'Brand', 'Local', 'Address', 'State', 'City', 'Zip Code', 'Date Collected', 'Url'])
tomatoFrame = pd.DataFrame(columns=['Heirloom Tomatoes', 'Current Price', 'Orignal Price', 'Weight in lbs', 'True Weight', 'Brand', 'Organic', 'Local', 'Address', 'State', 'City', 'Zip Code', 'Date Collected', 'Url'])

cleaner = DataCleaner()
x = cleaner.cleanUp(test1, Products.Bacon.name, 'https://gatewaymarket.storebyweb.com/s/1000-1/i/INV-1000-18483')
print(x, len(x))
x = cleaner.cleanUp(test2, Products.Bacon.name, 'https://gatewaymarket.storebyweb.com/s/1000-1/i/INV-1000-18483')
print(x, len(x))

test1 = ['EGGS ORGANIC', '$5.49/ea Product Price', None, 'STATELINE', 'Dozen']

# test2 =
x = cleaner.cleanUp(test1, Products.Eggs.name, 'https://gatewaymarket.storebyweb.com/s/1000-1/i/INV-1000-18483')
print(x, len(x))

testT = ['TOMATOES HEIRLOOM CHERRY MIX LOCAL ORGANIC', '$4.99/ea Product Price', None ,'DEL CABO', None, None]

x = cleaner.cleanUp(testT, Products.HeirloomTomatoes.name, 'https://gatewaymarket.storebyweb.com/s/1000-1/i/INV-1000-11820')
print(x)
# x = cleaner.cleanUp(testT, Products.HeirloomTomatoes.name, '')
# print(x)




# currentDate = str(datetime(datetime.today().year, datetime.today().month, datetime.today().day))[:-8]
