In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer # tfidf matrix
from sklearn.metrics.pairwise import cosine_similarity
import re
import numpy as np
import pandas as pd
import ipywidgets as widgets
from IPython.display import display

from datetime import time
from datetime import datetime
from datetime import timedelta
from geopy.geocoders import Nominatim

In [2]:
# Hyperparameters for the functions
open_threshold = 3 # If there are less than open_threshold shops opened, show also closed shops
stock_threshold = 3 # If there are less than stock_threshold shops with the product on stock, show also shops without the
                    # product on stock
ID_product_count = 0 # ID Count. Adds by one each time a product is added.
ID_shop_count = 0

In [3]:
def search(title, names):
    '''
    Given a search and the possible search results, returns the most accurate.
    Args:
        title: str. Input of the search of the user
        names: list of str. Names of the products from the database.
        
    Returns:
        str: The name of the product that the user meant to search.
    '''
    vectorizer = TfidfVectorizer(analyzer = 'char_wb') 
    tfidf = vectorizer.fit_transform(names)
    
    title = clean_title(title)
    query_vec = vectorizer.transform([title])
    # we compare how similar is the title entered with all titles
    similarity = cosine_similarity(query_vec, tfidf).flatten()
    # to select the five more similar titles
    indices = np.argpartition(similarity, -1)[-1:]
    
    return names[indices]

In [4]:
def add_product(attr_list, product_db):
    '''
    Function used by the owner for adding a new product to the DataBase.
    Args:
        - attr_list: [
            shop ID (int),
            product name (str),
            product description (str),
            price in euros (int),
            product stock (int)
            ]
        - product_db: Pandas dataframe. Database containing the products.
        
    Returns:
        - Pandas dataframe: The same dataframe with the added new product.
    '''
    global ID_product_count
    
    attr_list.insert(1, ID_product_count )
    
    product_db.loc[product_db.shape[0]] = attr_list
    
    ID_product_count += 1
    
    return product_db
    
    
def add_shop(attr_list, shop_db):
    '''
    Function used by the owner for adding a new shop to the DataBase.
    Args:
        - attr_list: [
                shop name (str),
                adress (str),
                number (int),
                zip code (int),
                opening time hour (int),
                opening time minute (int),
                closing time hour (int),
                closing time minute (int)
                ]
        - shop_db: Pandas dataframe. Database containing the shops.
        
    Returns:
        - Pandas dataframe: The same dataframe with the added shop.
    '''
    global ID_shop_count
    
    geolocator = Nominatim(user_agent="MyApp")
    shop_name = attr_list[0]
    adress, number, zipcode = attr_list[1], attr_list[2], attr_list[3]
    loc = geolocator.geocode(adress+', '+str(number)+', '+str(zipcode))
    shop_lat = loc.latitude
    shop_lon = loc.longitude
    
    open_h, open_m = attr_list[4], attr_list[5]
    close_h, close_m = attr_list[6], attr_list[7]
    
    op_time = time(hour=open_h, minute=open_m)
    cl_time = time(hour=close_h, minute=close_m)
    
    add = [ID_shop_count, shop_name, adress, number, zipcode, shop_lat, shop_lon, 1, op_time, cl_time ]
    shop_db.loc[shop_db.shape[0]] = add
    
    ID_shop_count += 1
    
    return shop_db

In [5]:
def update_stock_down(product_db, productID, n_products):
    '''
    Function called when user buys n_products ammount of the product "productID". Updates the database with this info.
    Args:
        - product_db: Database containing the products.
        - productID: ID of the product that was sold.
        - n_products: Quantity of products that were sold.
        
    Returns:
        - Pandas dataframe: The dataset updated.
    '''
    product_stock = product_db[product_db['product ID'] == productID]['product stock'].iloc[0]
    
    if product_stock - n_products < 0:
        return None
    
    product_db[product_db['product ID'] == productID]['product stock'] -= n_products
    
    return product_db


def update_stock_up(product_db, productID, n_products):
    '''
    Function called when the shop re-stocks n_products ammount of the product "productID". Updates the database with this info.
    Args:
        - product_db: Database containing the products.
        - productID: ID of the product that was re-stocked.
        - n_products: Quantity of products that were re-stocked.
        
    Returns:
        - Pandas dataframe: The dataset updated.
    '''
    product_db[product_db['product ID'] == productID]['product stock'] +=  n_products
    
    return product_db


def update_price(product_db, productID, new_price):
    '''
    Function called with the shop modifies the price of the product "productID" with the price "new_price".
    Args:
        - product_db: Database containing the products.
        - productID: ID of the product that was modified.
        - new_price: new price of the product.
        
    Returns:
        - Pandas dataframe: The dataset updated.        
    '''
    product_db[product_db['product ID'] == productID]['price in euros'] = new_price
    
    return product_db

In [6]:
def find_features(query, product_db, shop_db, user_db, city_db, city, sorting_criteria = 'price', user_coords=None):
    '''
    Function used by the user to find the products.
    Returns a list of the products asked for, sorted using sorting criteria specified. By price as default.
    
    Args:
        - query: str. Contains the user's query.
        - product_db: Database with the products.
        - shop_db: Database with the shops.
        - user_db: Database with the user.
        - city_db: Database with the citied.
        - sorting_criteria: str. Indicates the sorting criterion. Possible outputs: 'price', 'location', 'ratting'
        - user_coords: [latitude (int), longitude (int)]. Only necessary if sorting_criteria == 'location'
        
    Returns:
        - dataframe with all the products that match the search.
    '''
    
    item_set = set(product_db['product name'].to_numpy())
    item_query = search(query, item_set)
    
    # Only Show items queried by the user
    items_db_query = product_db[product_db['product name'] == product_query]
    
    
    # Show only items from zip code
    zip_code = city_db[city_db['city name'] == city]['zip code'].iloc[0] # Get city zip code
    
    items_db_query = pd.merge(items_db_query, shop_db, how='left', on='shop_ID')
    items_db_query = items_db_query[items_db_query['zip code']==zip_code]
    
    
    # Si hi ha menys de open_threshold tendes obertes, mostra també les que estan tancades 
    # Si hi ha més de open_threshold tendes no es mostra
    now = datetime.now()
    now_sec = now.hour*60*60+now.minute*60
    
    opened = items_db_query['opening time'].hour*60*60 + items_db_query['opening time'].minute*60 < now_sec
    not_closed = items_db_query['closing time'].hour*60*60 + items_db_query['closing time'].minute*60 > now_sec
    
    if sum(opened & not_closed) > open_threshold: # Hi ha menys de 3 tendes obertes
        items_db_query = items_db_query[opened & not_closed] # Show only opened shops
        
        
    # Si hi ha menys de stock_threshold tendes amb el producte a stock, mostra també el que no el tenen en stock
    # Si hi ha més de stock_threshold tendes no es mostra
    on_stock = items_db_query['product stock'] > 0
    
    if sum(on_stock) > stock_threshold:
        items_db_query = items_db_query[on_stock]
        
    else:
        items_db_query['on stock'] = on_stock
        items_db_query.sort_values('on stock', ascending=False, inplace=True)
        
    # Sorting criterion
    if sorting_criteria == 'location' and user_coords != None:
        items_bd_query['distances'] = list(map( calc_distance, user_coords[0], user_coords[1], items_db_query['shop lat']
                                                     , items_db_query['shop lon'] ))

        items_db_query.sort_values('distances', ascending=True, inplace=True)
        
    elif sorting_criteria == 'price':
        items_db_query.sort_values('price in euros', ascending=True, inplace=True)
    
    elif sorting_criteria == 'ratting':
        items_db_query.sort_values('shop ratting', ascending=False, inplace=True)
        
    
    return items_db_query

In [7]:
def calc_distance(lat1, lon1, lat2, lon2):
    '''
    Calculates distance between two points.
    Args:
        - lat1, lon1 = int. user_coords
        - lat2, lon2 = int. shop_coords
    Returns:
        - Distance between user and the shop using latitude and longitude coords.
    '''
    return math.acos(math.sin(lat1)*math.sin(lat2)+math.cos(lat1)*math.cos(lat2)*math.cos(lon2-lon1))*6371