In [None]:
import numpy as np

class QLearningAgent:
    def __init__(self, category_list, budget, learning_rate=0.1, discount_factor=0.9, epsilon=0.3):
        self.category_list = category_list
        self.budget = budget
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = epsilon
        self.q_values = {}

    def update_q_value(self, state, action, next_state, reward):
        current_q = self.q_values.get((state, action), 0)
        max_next_q = max(self.q_values.get((next_state, a), 0) for a in [0, 1])
        new_q = current_q + self.learning_rate * (reward + self.discount_factor * max_next_q - current_q)
        self.q_values[(state, action)] = new_q

    def choose_action(self, state):
        if np.random.rand() < self.epsilon:
            return np.random.choice([0, 1])
        else:
            return np.argmax([self.q_values.get((state, a), 0) for a in [0, 1]])



    def find_optimal_products(self, data, num_episodes=1000):
        for episode in range(num_episodes):
            state = (
                np.random.choice([store for store, _, _, _ in data]),
                np.random.choice(self.category_list),
                self.budget
            )
            selected_categories = set()

            while True:
                action = self.choose_action(state)
                store, category, remaining_budget = state

                if action == 1:
                    available_products = [
                        entry for entry in data
                        if entry[0] == store and entry[3] == category and entry[1] not in selected_categories
                    ]
                    if available_products:
                        product_prices = [entry[2] for entry in available_products]
                        selected_price = np.random.choice(product_prices)
                        selected_product = next(
                            entry for entry in available_products if entry[2] == selected_price
                        )
                        selected_categories.add(selected_product[1])
                        product_price = selected_product[2]
                        if remaining_budget >= product_price:
                            remaining_budget -= product_price

                next_state = (
                    np.random.choice([store for store, _, _, _ in data]),
                    np.random.choice(self.category_list),
                    remaining_budget
                )
                reward = 1 if remaining_budget > 0 else -1

                self.update_q_value(state, action, next_state, reward)
                state = next_state

                if remaining_budget == 0 or np.random.rand() < 0.1:
                    break

    def get_optimal_products(self):
        optimal_products = []
        selected_categories = set()
        for state, _ in self.q_values.keys():
            store, category, remaining_budget = state
            q_value_purchase = self.q_values.get((state, 1), 0)
            q_value_no_purchase = self.q_values.get((state, 0), 0)
            if (
                remaining_budget > 0
                and q_value_purchase > q_value_no_purchase
                and category not in selected_categories
            ):
                available_products = [
                    entry for entry in data
                    if entry[0] == store and entry[3] == category and entry[1] not in selected_categories
                ]
                if available_products:
                    product_prices = [entry[2] for entry in available_products]
                    selected_price = np.random.choice(product_prices)
                    selected_product = next(
                        entry for entry in available_products if entry[2] == selected_price
                    )
                    selected_categories.add(category)
                    optimal_products.append((category, store, selected_product[2], selected_product[1]))
        return optimal_products



    def get_category_product_price(self, data, store, category):
        products = [product for _, product, _, c in data if c == category]
        valid_entries = [entry for entry in data if entry[0] == store and entry[1] in products]
        if valid_entries:
            return np.random.choice([entry[2] for entry in valid_entries])
        return 0



# Example usage
data = [
    ['Walmart', 'Corn', 0.67, 'Fresh Produce'],
    ['Walmart', 'Green Kiwi', 0.87, 'Fresh Produce'],
    ['Safeway', 'Long English Cucumbers', 0.88, 'Fresh Produce'],
    ['Safeway', 'Corn product of USA', 0.88, 'Fresh Produce'],
    ['Walmart', 'Iceberg Lettuce', 0.94, 'Fresh Produce'],
    ['Walmart', 'Kraft Dinner', 0.97, 'Dairy'],
    ['Walmart', 'Great Value Rotini Pasta', 0.97, 'Carbs'],
    ['Safeway', 'Garlic', 0.99, 'Fresh Produce'],
    ['Save-on Foods', 'Cut Watermelon', 0.99, 'Fresh Produce'],
    ['Save-on Foods', 'Sourdough Bun', 1.0, 'Carbs'],
    ['Buy-Low Foods', 'Kraft BBQ Sauce', 1.0, 'Protein'],
    ['No Frills', 'KRAFT DINNER ORIGINAL MACARONI & CHEESE, 225 G', 1.25, 'Dairy'],
    ['Safeway', 'Fresh Choice Oysters', 1.25, 'Protein'],
    ['T&T', 'NAPA', 1.27, 'Fresh Produce'],
    ['Safeway', 'MILK 2 GO', 1.29, 'Dairy'],
    ['Walmart', 'Avocados', 1.47, 'Fresh Produce'],
    ['Walmart', 'Ataulfo Mangoes', 1.47, 'Fresh Produce'],
    ['Walmart', 'Red Mangoes', 1.47, 'Fresh Produce'],
    ['T&T', 'LOTUS ROOT', 1.48, 'Fresh Produce'],
    ['T&T', 'TOMATOES', 1.48, 'Fresh Produce'],
    ['T&T', 'HAMI MELON', 1.48, 'Protein'],
    ['T&T', 'CHINESE GOLDEN PEAR', 1.48, 'Fresh Produce'],
    ['T&T', 'PURPLE YAM', 1.48, 'Fresh Produce'],
    ['Save-on Foods', 'Tomatoes on the Vine', 1.49, 'Fresh Produce'],
    ['Safeway', 'Whole Wild Alaska Sockeye Salmon', 1.49, 'Protein'],
    ['No Frills', 'BEATRICE 1%, 946 ML/1 L OR DAIRYLAND, 946 ML CHOCOLATE MILK', 1.69, 'Dairy'],
    ['T&T', 'SUPER Q GOLDEN BIHON CORNSTARCH STICKS, 454g', 1.78, 'Fresh Produce'],
    ['Save-on Foods', 'Backyard BBQ Coleslaw', 1.79, 'Protein'],
    ['Save-on Foods', 'Mardi Gras Potato Salad', 1.79, 'Fresh Produce'],
    ['Save-on Foods', 'Walnuts', 1.79, 'Carbs'],
    ['No Frills', 'KOSA RICE FLOUR or GLUTINOUS RICE FLOUR, 400 g', 1.88, 'Carbs'],
    ['Save-on Foods', 'Western Family Chewy Granola Bars', 1.89, 'Carbs'],
    ['No Frills', 'BARILLA PASTA, 410 g', 1.97, 'Carbs'],
    ['Walmart', 'Bulk Ginger Root', 1.97, 'Fresh Produce']
]

category_list = ['Carbs', 'Dairy', 'Fresh Produce', 'Protein']
budget = 4

num_episodes = 1000  # Number of episodes to run the algorithm

agent = QLearningAgent(category_list, budget)
agent.find_optimal_products(data, num_episodes)
optimal_products = agent.get_optimal_products()

if optimal_products:
    print("Optimal Products:")
    for category, store, price, product in optimal_products:
        print(f"- Category: {category}, Store: {store}, Product: {product}, Price: {price}")
else:
    print("No optimal products found.")


Optimal Products:
- Category: Carbs, Store: Walmart, Product: Great Value Rotini Pasta, Price: 0.97
- Category: Dairy, Store: Safeway, Product: MILK 2 GO, Price: 1.29
- Category: Protein, Store: Safeway, Product: Fresh Choice Oysters, Price: 1.25
- Category: Fresh Produce, Store: Walmart, Product: Green Kiwi, Price: 0.87


In [None]:
#code for classifying

import requests
import geocoder
import sqlite3
import pandas as pd
import numpy as np
from geopy.distance import geodesic

BASE_URL = 'https://flipp.com'
BACKEND_URL = 'https://backflipp.wishabi.com/flipp'
SEARCH_URL = f'{BACKEND_URL}/items/search'
ITEM_URL = f'{BACKEND_URL}/items/'

class Store:
    def __init__(self, name, postal_code):
        self.name = name
        self.postal_code = postal_code

class FlyerScraper:
    def __init__(self):
        self.conn = sqlite3.connect('items.db')  # Create a SQLite database file
        self.c = self.conn.cursor()

        # Create items table if it doesn't exist
        self.c.execute('''CREATE TABLE IF NOT EXISTS items
                        (store_name text, item_name text, item_price real, longitude real, latitude real, postal_code text)''')
        self.conn.commit()

        self.stores = []

    def add_store(self, store):
        self.stores.append(store)

    def search(self, query, postal_code):
        data = requests.get(
            SEARCH_URL,
            params={
                'q': query,
                'postal_code': postal_code,
            }
        ).json()

        items = []
        for item_data in data.get('items'):
            item_id = item_data.get('flyer_item_id')
            item = self.scrape_item(item_id)
            item_name = item['item']['name']
            item_price = item['item']['current_price']
            items.append({'name': item_name, 'price': item_price, 'postal_code': postal_code})

        return items

    def scrape_item(self, item_id):
        return requests.get(f"{ITEM_URL}/{item_id}").json()

    def insert_item_data(self, store_name, item_name, item_price, longitude, latitude, postal_code):
        self.c.execute("INSERT INTO items VALUES (?, ?, ?, ?, ?, ?)",
                       (store_name, item_name, item_price, longitude, latitude, postal_code))
        self.conn.commit()

    def scrape_store(self, store):
        g = geocoder.arcgis(store.postal_code)
        latitude = g.latlng[0]
        longitude = g.latlng[1]
        search_results = self.search(store.name, store.postal_code)
        for result in search_results:
            item_name = result['name']
            item_price = result['price']
            postal_code = result['postal_code']
            self.insert_item_data(store.name, item_name, item_price, longitude, latitude, postal_code)

    def close_connection(self):
        self.conn.close()

    def search_nearby_stores(self, postal_code, radius_km):
        g_target = geocoder.arcgis(postal_code)
        target_latitude = g_target.latlng[0]
        target_longitude = g_target.latlng[1]

        self.c.execute("SELECT DISTINCT store_name, postal_code FROM items")
        stores = self.c.fetchall()

        nearby_stores = []
        for store in stores:
            store_name = store[0]
            store_postal_code = store[1]

            g_store = geocoder.arcgis(store_postal_code)
            store_latitude = g_store.latlng[0]
            store_longitude = g_store.latlng[1]

            distance = geodesic((target_latitude, target_longitude), (store_latitude, store_longitude)).kilometers
            if distance <= radius_km:
                nearby_stores.append({store_name: store_postal_code})

        return nearby_stores


# Create an instance of FlyerScraper
flyer_scraper = FlyerScraper()

# Create store instances
store1 = Store('Walmart', 'V6T 2J2')
store2 = Store('T&T', 'V6T 3W2')
store3 = Store('Safeway', 'V6T 1W9')
store4 = Store('Save-on Foods', 'V6T 1W9')
store5 = Store('No Frills', 'V6T 1W9')
store6 = Store('Buy-Low Foods', 'V6T 1W9')

# Add stores to the scraper
flyer_scraper.add_store(store1)
flyer_scraper.add_store(store2)
flyer_scraper.add_store(store3)
flyer_scraper.add_store(store4)
flyer_scraper.add_store(store5)
flyer_scraper.add_store(store6)

# Scrape each store
for store in flyer_scraper.stores:
    flyer_scraper.scrape_store(store)

# Search nearby stores within a radius of 5 kilometers from the postal code 'V6J 3J9'
nearby_stores = flyer_scraper.search_nearby_stores('V6T 2J2', radius_km=5)

# Create an empty list to store the item and price data
item_prices = []

# Iterate over the nearby stores
for store_data in nearby_stores:
    for store_name, postal_code in store_data.items():
        flyer_scraper.c.execute("SELECT item_name, item_price FROM items WHERE store_name = ? AND postal_code = ?",
                            (store_name, postal_code))
        items = flyer_scraper.c.fetchall()
        for item in items:
            item_name = item[0]
            item_price = item[1]
            item_prices.append((store_name, item_name, item_price))

# Convert the item and price data into a pandas DataFrame
df = pd.DataFrame(item_prices, columns=['Store', 'Items', 'Prices'])

# Convert 'Prices' column to numeric
df['Prices'] = pd.to_numeric(df['Prices'], errors='coerce')

# Drop duplicate rows based on 'Items' column
df = df.drop_duplicates(subset=['Items'])

# Sort by prices
df = df.sort_values(by='Prices')

# Update code here
# Classify items into categories based on their names
category_keywords = {
    'Protein': ['beef', 'pancetta', 'proscuito', 'bacon', 'clams', 'salami', 'bbq', 'barbecue', 'duck', 'prawns', 'lamb', 'sirloin', 'wings', 'filet','filets', 'ham', 'chicken', 'pork', 'turkey', 'oysters', 'meat', 'cod', 'scallop', 'turbot', 'longanisa', 'fish', 'sausage', 'hotdog', 'wieners', 'lobster', 'shrimps', 'goat meat', 'salmon', 'steak', 'eggs', 'tuna', 'sardine'],
    'Fresh Produce': ['apple', 'chia', 'pistachios', 'cherry', 'mandarin', 'blackberry', 'asparagus', 'coconut', 'cauliflower', 'strawberries', 'lettuce', 'blackberries', 'banana', 'napa', 'cabbage', 'cucumber', 'mushroom', 'chestnut', 'dates', 'longan', 'orange', 'grape', 'strawberry', 'broccoli', 'yam', 'melon', 'eggplant', 'radish', 'lemon', 'pear', 'lotus', 'lychee', 'carrot', 'mango', 'mangoes', 'garlic', 'nectarine', 'peach', 'coleslaw', 'lettuce', 'tomato', 'potato', 'corn', 'kiwi', 'avocado', 'blueberries', 'ginger', 'spring mix', 'peppers'],
    'Dairy': ['milk', 'cheese', 'cheezee', 'cheddar', 'yogurt', 'butter', 'ice cream', 'yogourt', 'kraft dinner', 'mozzarella', 'creamer', 'nutella', 'oat beverage', 'oat milk'],
    'Carbs': ['bread', 'toast', 'perogies', 'gnocchi', 'waffles', 'rice', 'pasta', 'oat', 'walnut', 'udon', 'vermicelli', 'noodle', 'ramen', 'tofu', 'oatmeal', 'cereal', 'quinoa,' 'chip', 'chips', 'kraft dinner', 'pizza', 'buns', 'macaroni', 'cookies', 'protein bars', 'granola bars', 'coffee', 'dumplings', 'dim sum', 'bun' ]
}

def classify_item(item_name):
    item_name_lower = item_name.lower()
    for category, keywords in category_keywords.items():
        if any(keyword in item_name_lower for keyword in keywords):
            return category
    return 'Other'

# Apply the classification function to the 'Items' column
df['Category'] = df['Items'].apply(classify_item)

# Drop rows with NaN in the 'Prices' column
df = df.dropna(subset=['Prices'])

# Remove items with category "Other"
df = df[df['Category'] != 'Other']

# Print the DataFrame
pd.set_option('display.max_rows', None)
df.to_csv('output.csv', encoding='utf-8-sig')
print(df)

# Close the database connection
flyer_scraper.close_connection()


ModuleNotFoundError: ignored

In [None]:
import numpy as np
import csv

class QLearningAgent:
    def __init__(self, category_list, budget, learning_rate=0.1, discount_factor=0.9, epsilon=0.02):
        self.category_list = category_list
        self.budget = budget
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = epsilon
        self.q_values = {}

    def update_q_value(self, state, action, next_state, reward):
        current_q = self.q_values.get((state, action), 0)
        max_next_q = max(self.q_values.get((next_state, a), 0) for a in [0, 1])
        new_q = current_q + self.learning_rate * (reward + self.discount_factor * max_next_q - current_q)
        self.q_values[(state, action)] = new_q

    def choose_action(self, state):
        if np.random.rand() < self.epsilon:
            return np.random.choice([0, 1])
        else:
            return np.argmax([self.q_values.get((state, a), 0) for a in [0, 1]])



    def find_optimal_products(self, data, num_episodes=1000):
        for episode in range(num_episodes):
            state = (
                np.random.choice([store for store, _, _, _ in data]),
                np.random.choice(self.category_list),
                self.budget
            )
            selected_categories = set()

            while True:
                action = self.choose_action(state)
                store, category, remaining_budget = state

                if action == 1:
                    available_products = [
                        entry for entry in data
                        if entry[0] == store and entry[3] == category and entry[1] not in selected_categories
                    ]
                    if available_products:
                        product_prices = [entry[2] for entry in available_products]
                        selected_price = np.random.choice(product_prices)
                        selected_product = next(
                            entry for entry in available_products if entry[2] == selected_price
                        )
                        selected_categories.add(selected_product[1])
                        product_price = selected_product[2]
                        if remaining_budget >= product_price:
                            remaining_budget -= product_price

                next_state = (
                    np.random.choice([store for store, _, _, _ in data]),
                    np.random.choice(self.category_list),
                    remaining_budget
                )
                reward = 1 if remaining_budget > 0 else -1

                self.update_q_value(state, action, next_state, reward)
                state = next_state

                if remaining_budget == 0 or np.random.rand() < 0.1:
                    break

    def get_optimal_products(self):
        optimal_products = []
        selected_categories = set()
        for state, _ in self.q_values.keys():
            store, category, remaining_budget = state
            q_value_purchase = self.q_values.get((state, 1), 0)
            q_value_no_purchase = self.q_values.get((state, 0), 0)
            if (
                remaining_budget > 0
                and q_value_purchase > q_value_no_purchase
                and category not in selected_categories
            ):
                available_products = [
                    entry for entry in data
                    if entry[0] == store and entry[3] == category and entry[1] not in selected_categories
                ]
                if available_products:
                    product_prices = [entry[2] for entry in available_products]
                    selected_price = np.random.choice(product_prices)
                    selected_product = next(
                        entry for entry in available_products if entry[2] == selected_price
                    )
                    selected_categories.add(category)
                    optimal_products.append((category, store, selected_product[2], selected_product[1]))
        return optimal_products



    def get_category_product_price(self, data, store, category):
        products = [product for _, product, _, c in data if c == category]
        valid_entries = [entry for entry in data if entry[0] == store and entry[1] in products]
        if valid_entries:
            return np.random.choice([entry[2] for entry in valid_entries])
        return 0


def read_data_from_csv(filename):
    data = []
    with open(filename, 'r') as file:
        csv_reader = csv.reader(file)
        header = next(csv_reader)  # Read the header row

        # Find the indices of the required columns
        store_index = header.index('Store')
        item_index = header.index('Items')
        price_index = header.index('Prices')
        category_index = header.index('Category')

        for row in csv_reader:
            try:
                store = row[store_index]
                item = row[item_index]
                price = float(row[price_index])
                category = row[category_index]
                data.append([store, item, price, category])
            except ValueError:
                # Skip the row if it does not have the expected number of values
                continue
    return data



data = read_data_from_csv("output.csv")


category_list = ['Carbs', 'Dairy', 'Fresh Produce', 'Protein']
budget = 30

num_episodes = 1000  # Number of episodes to run the algorithm

agent = QLearningAgent(category_list, budget)
agent.find_optimal_products(data, num_episodes)
optimal_products = agent.get_optimal_products()

if optimal_products:
    print("Optimal Products:")
    for category, store, price, product in optimal_products:
        print(f"- Category: {category}, Store: {store}, Product: {product}, Price: {price}")
else:
    print("No optimal products found.")





FileNotFoundError: ignored

In [None]:
#updated code with no cap on suggestions

import numpy as np
import csv
import random

class QLearningAgent:
    def __init__(self, category_list, budget, learning_rate=0.1, discount_factor=0.5, epsilon=0.5):
        self.category_list = category_list
        self.budget = budget
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = epsilon
        self.q_values = {}

    def update_q_value(self, state, action, next_state, reward):
        current_q = self.q_values.get((state, action), 0)
        max_next_q = max(self.q_values.get((next_state, a), 0) for a in [0, 1])
        new_q = current_q + self.learning_rate * (reward + self.discount_factor * max_next_q - current_q)
        self.q_values[(state, action)] = new_q

    def choose_action(self, state):
        if np.random.rand() < self.epsilon:
            return np.random.choice([0, 1])
        else:
            return np.argmax([self.q_values.get((state, a), 0) for a in [0, 1]])

    def find_optimal_products(self, data, num_episodes=1000):

        for episode in range(num_episodes):
            state = (
                np.random.choice([store for store, _, _, _ in data]),
                np.random.choice(self.category_list),
                self.budget
            )
            selected_categories = set()

            while True:
                action = self.choose_action(state)
                store, category, remaining_budget = state

                if action == 1:
                    available_products = [
                        entry for entry in data
                        if entry[0] == store and entry[3] == category
                    ]
                    if available_products:
                        product_prices = [entry[2] for entry in available_products]
                        selected_price = np.random.choice(product_prices)
                        selected_product = next(
                            entry for entry in available_products if entry[2] == selected_price
                        )
                        selected_categories.add(selected_product[1])
                        product_price = selected_product[2]
                        if remaining_budget >= product_price:
                            remaining_budget -= product_price

                next_state = (
                    np.random.choice([store for store, _, _, _ in data]),
                    np.random.choice(self.category_list),
                    remaining_budget
                )
                reward = 1 if remaining_budget > 0 else -1

                self.update_q_value(state, action, next_state, reward)
                state = next_state

                if remaining_budget == 0 or np.random.rand() < 0.1:
                    break

            # Update the epsilon value based on the remaining budget
            self.epsilon = max(0.1, min(0.5, self.budget / 100))  # Adjust 100 based on your data

        self.epsilon = 0  # Set epsilon to 0 to choose the best action in the next step

        optimal_products = self.get_optimal_products(data=data)
        return optimal_products

    def get_optimal_products(self, data):
        optimal_products = []
        selected_categories = set()
        remaining_budget = self.budget

        max_suggestions = min(len(self.category_list), int(self.budget / 5))  # Adjust 10 based on your preference

        sorted_q_values = sorted(self.q_values.items(), key=lambda x: x[1], reverse=True)

        for (state, action), q_value in sorted_q_values:
            store, category, _ = state
            if (
                remaining_budget > 0
                and action == 1

                and len([p for p in optimal_products if p[0] == category]) < max_suggestions
            ):
                available_products = [
                    entry for entry in data
                    if entry[0] == store and entry[3] == category and sum(1 for p in optimal_products if p[0] == category) < max_suggestions
                ]

                if available_products:
                    selected_product = random.choice(available_products)
                    selected_categories.add(category)
                    optimal_products.append((category, store, selected_product[2], selected_product[1]))
                    remaining_budget -= selected_product[2]

        return optimal_products


def read_data_from_csv(filename):
    data = []
    with open(filename, 'r') as file:
        csv_reader = csv.reader(file)
        header = next(csv_reader)  # Read the header row

        # Find the indices of the required columns
        store_index = header.index('Store')
        item_index = header.index('Items')
        price_index = header.index('Prices')
        category_index = header.index('Category')

        for row in csv_reader:
            try:
                store = row[store_index]
                item = row[item_index]
                price = float(row[price_index])
                category = row[category_index]
                data.append([store, item, price, category])
            except ValueError:
                # Skip the row if it does not have the expected number of values
                continue
    return data

data = read_data_from_csv("output.csv")
category_list = ['Carbs', 'Dairy', 'Fresh Produce', 'Protein']
budget = 2000
num_episodes = 1000  # Number of episodes to run the algorithm

agent = QLearningAgent(category_list, budget)
agent.find_optimal_products(data, num_episodes)



optimal_products = agent.get_optimal_products(data=data)


if optimal_products:
    print("Optimal Products:")
    for category, store, price, product in optimal_products:
        print(f"- Category: {category}, Store: {store}, Product: {product}, Price: {price}")
else:
    print("No optimal products found.")


FileNotFoundError: ignored