# The Digital Cheese Sommelier

## The Recommender Model

In [1]:
import pandas as pd 
import numpy as np
import requests
from bs4 import BeautifulSoup
import time
import unicodedata

from scipy import sparse
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.preprocessing import StandardScaler

from ipywidgets import interact, interactive, fixed, interact_manual
from ipywidgets import widgets, interact, interact_manual, fixed
from IPython.display import display, clear_output, HTML, Javascript

import warnings
warnings.filterwarnings('ignore')


### Part 1: Import the data

In [2]:
#read big_cheese2.csv into 'cheese_com'
cheese_com = pd.read_csv('./data/model_testing2.csv')

In [3]:
# inspect the dataframe
cheese_com.head()

Unnamed: 0,Name,Milk,Country of origin,Region,Type,Texture,Rind,Flavour,Aroma,Vegetarian,...,Milk_mare,Milk_moose,Milk_reindeer,Milk_water buffalo,Milk_yak,Treatment_pasteurized,Treatment_unpasteurized,Milk_blend yes,Triple Cream,Double Cream
0,Abbaye de Belloc,unpasteurized cow's and sheep's milk,France,Pays Basque,"semi-hard, artisan","creamy, dense and firm",natural,burnt caramel,lanoline,yes,...,0,0,0,0,0,0,1,1,0,0
1,Abbaye de Belval,cow's milk,France,,semi-hard,elastic,washed,,aromatic,no,...,0,0,0,0,0,0,0,0,0,0
2,Abbaye de Citeaux,unpasteurized cow's milk,France,Burgundy,"semi-soft, artisan, brined","creamy, dense and smooth",washed,"acidic, milky, smooth","barnyardy, earthy",no,...,0,0,0,0,0,0,1,0,0,0
3,Abbaye de Timadeuc,pasteurized cow's milk,France,province of Brittany,semi-hard,soft,washed,,,no,...,0,0,0,0,0,1,0,0,0,0
4,Abbaye du Mont des Cats,pasteurized cow's milk,France,Nord-Pas-de-Calais,"semi-soft, artisan, brined",smooth and supple,washed,"milky, salty",floral,no,...,0,0,0,0,0,1,0,0,0,0


In [4]:
# set a new dataframe that I can drop values from, without still having to the dataframe with the undummied values
df = cheese_com

In [5]:
# check the copy
df.head()

Unnamed: 0,Name,Milk,Country of origin,Region,Type,Texture,Rind,Flavour,Aroma,Vegetarian,...,Milk_mare,Milk_moose,Milk_reindeer,Milk_water buffalo,Milk_yak,Treatment_pasteurized,Treatment_unpasteurized,Milk_blend yes,Triple Cream,Double Cream
0,Abbaye de Belloc,unpasteurized cow's and sheep's milk,France,Pays Basque,"semi-hard, artisan","creamy, dense and firm",natural,burnt caramel,lanoline,yes,...,0,0,0,0,0,0,1,1,0,0
1,Abbaye de Belval,cow's milk,France,,semi-hard,elastic,washed,,aromatic,no,...,0,0,0,0,0,0,0,0,0,0
2,Abbaye de Citeaux,unpasteurized cow's milk,France,Burgundy,"semi-soft, artisan, brined","creamy, dense and smooth",washed,"acidic, milky, smooth","barnyardy, earthy",no,...,0,0,0,0,0,0,1,0,0,0
3,Abbaye de Timadeuc,pasteurized cow's milk,France,province of Brittany,semi-hard,soft,washed,,,no,...,0,0,0,0,0,1,0,0,0,0
4,Abbaye du Mont des Cats,pasteurized cow's milk,France,Nord-Pas-de-Calais,"semi-soft, artisan, brined",smooth and supple,washed,"milky, salty",floral,no,...,0,0,0,0,0,1,0,0,0,0


### Part 2: Dummying the rest of the features in the dataframe to get a nice big binary matrix

In [6]:
# Because a lot of the values in my features were several values listed in a string separated by commas,
# I created a function to parse through these values and create a dummy variable for each entry
# as well as print out a list of all of the unique entries for each feature
def dummy_listed_features(df, feature):

    # for ceratin features, some cells have multiple values
    # to get all of the values of these features, first create a list 
    # of all of the UNIQUE values that appear in the selected feature

    # fill the null values of the feature with a string value of "None" so as to not throw an error
    df[feature].fillna('None', inplace=True)

    # create an empty string
    strang = ""
    # look through every cell of the feature
    for i in list(df[feature].unique()):
        # add every entry in the cell into a long string seperated by a comma and a space
        strang += i + ', '

    # strip the extraneous comma and whitespace from the end of the newly gathered string
    # replace ' and' with a comma
    strang = strang.replace(' and', ',').strip(', ')

    # seperate the long string of values by a comma and space into a list of values in the feature
    # convert this list into a set to drop the values that appear more then once in an entry for the feature
    # convert this back into a list so it can be indexed 
    feature_list = list(set(strang.split(', ')))

    # initialize a dummy column for each unique value of the feature
    for i in feature_list:
        # initialize each column as 0
        df[f'{feature}_{i}']=0

    # go through every cheese in the list
    for i in range(len(df)):

        # look at each value in each cell by seperating by commas, putting it into a list, 
        # and iterating through that list (remember to replace any ' and' with a comma before creating list)
        for k in df[feature][i].replace(' and', ',').split(', '):
            # if the value exists in the feature for a particular cheese, set the dummy value to 1
            df[f'{feature}_{k}'][i] = 1

            
            
    # once feature has been dummied, drop the original        
    #df.drop(columns=feature, inplace=True)
    
    # also drop the feature column created when the value was 'None' because that was a null value before        
    df.drop(columns=f'{feature}_None', inplace=True)

    # remove the 'None' value that was stored in the list, as it has been dropped from the dataframe
    feature_list.remove('None')
    
    # print out the values in each feature (for reference)
    print(f'List of values for {feature} : ')
    
    print(feature_list) 
    print()


In [7]:
# create a list of features that i want to be dummied
listed_features = ["Type", "Texture", "Flavour", "Aroma", "Rind", "Vegetarian", "Family", "Country of origin"]

# for all of the features listed above:
for i in listed_features:
    # dummy the feature and print the list of unique entries
    dummy_listed_features(df, i)
    # drop the original feature from the copy of the dataframe
    df = df.drop(columns=i)
    

List of values for Type : 
['soft', 'semi-hard', 'semi firm', 'whey', 'semi-soft', 'firm', 'soft-ripened', 'brined', 'hard', 'blue-veined', 'fresh soft', 'processed', 'artisan', 'smear-ripened', 'fresh firm']

List of values for Texture : 
['spreadable', 'soft', 'crumbly', 'dry', 'stringy', 'firm', 'flaky', 'soft-ripened', 'oily', 'creamy', 'springy', 'compact', 'dense', 'buttery', 'supple', 'runny', 'chewy', 'chalky', 'grainy', 'sticky', 'smooth', 'open', 'elastic', 'crystalline', 'fluffy', 'semi firm', 'brittle', 'close']

List of values for Flavour : 
['crunchy', 'vegetal', 'spicy', 'strong', 'mild', 'grassy', 'sour', 'fruity', 'creamy', 'umami', 'tangy', 'sharp', 'nutty', 'acidic', 'smokey', 'butterscotch', 'rustic', 'buttery', 'mineral', 'piquant', 'herbaceous', 'bitter', 'mellow', 'pronounced', 'savory', 'caramel', 'full-flavored', 'tart', 'citrusy', 'woody', 'meaty', 'milky', 'lemony', 'yeasty', 'smooth', 'subtle', 'earthy', 'pungent', 'licorice', 'sweet', 'salty', 'mushroomy', 

In [8]:
# check the dataframe
df.head()

Unnamed: 0,Name,Milk,Region,Producers,Description,Fat content,Fat_less than 0.15%,Fat_0.16-3%,Fat_4-19%,Fat_20-39%,...,Country of origin_Afghanistan,Country of origin_Ireland,Country of origin_Canada,Country of origin_Israel,Country of origin_Norway,Country of origin_Andorra,Country of origin_Albania,Country of origin_Bulgaria,Country of origin_Spain,Country of origin_Holland
0,Abbaye de Belloc,unpasteurized cow's and sheep's milk,Pays Basque,Abbaye de Notre-Dame de Belloc,Abbaye de Belloc is also known as 'Abbaye Notr...,99999.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Abbaye de Belval,cow's milk,,,This cheese is also known as Le Trappiste de B...,43.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Abbaye de Citeaux,unpasteurized cow's milk,Burgundy,Frères Frédéric and Joel,The Abbaye de Citeaux cheese comes from the Ci...,99999.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Abbaye de Timadeuc,pasteurized cow's milk,province of Brittany,Abbaye Cistercienne NOTRE-DAME DE TIMADEUC,"Being direct descendant of the Port du Salut, ...",99999.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Abbaye du Mont des Cats,pasteurized cow's milk,Nord-Pas-de-Calais,Abbaye du Mont des Cats,The Abbaye du Mont des Cats cheese is made by ...,50.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [9]:
# drop the columns that i dummied in the EDA ('Milk', 'Region', 'Fat content')
# also drop the features that i decided not to mode on ('Producers', 'Description')
df.drop(columns=['Milk', 'Region', 'Fat content', 'Producers', 'Description'], inplace=True)


In [10]:
# check the datafram.  Note that it is now all ones and zeros (except for the "Name" column)
df.head()

Unnamed: 0,Name,Fat_less than 0.15%,Fat_0.16-3%,Fat_4-19%,Fat_20-39%,Fat_40-59%,Fat_60-74%,Fat_75%+,Milk_vegan,Milk_cow,...,Country of origin_Afghanistan,Country of origin_Ireland,Country of origin_Canada,Country of origin_Israel,Country of origin_Norway,Country of origin_Andorra,Country of origin_Albania,Country of origin_Bulgaria,Country of origin_Spain,Country of origin_Holland
0,Abbaye de Belloc,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
1,Abbaye de Belval,0,0,0,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
2,Abbaye de Citeaux,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
3,Abbaye de Timadeuc,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
4,Abbaye du Mont des Cats,0,0,0,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [11]:
# in order to have a dataframe that is entirely numerical, move the "Name" entries from a column to the index
df.set_index('Name', inplace=True)

In [12]:
# check the dataframe.  Now its a big ol beautyful binary matrix y'all!
df.head()

Unnamed: 0_level_0,Fat_less than 0.15%,Fat_0.16-3%,Fat_4-19%,Fat_20-39%,Fat_40-59%,Fat_60-74%,Fat_75%+,Milk_vegan,Milk_cow,Milk_goat,...,Country of origin_Afghanistan,Country of origin_Ireland,Country of origin_Canada,Country of origin_Israel,Country of origin_Norway,Country of origin_Andorra,Country of origin_Albania,Country of origin_Bulgaria,Country of origin_Spain,Country of origin_Holland
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbaye de Belloc,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye de Belval,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye de Citeaux,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye de Timadeuc,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye du Mont des Cats,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


### Part 3:  Instantiate Raw Input Question As Series of Dropdown Menus

#### Instantiate input question for the name

In [14]:
# pull all the cheese names from the original dataframe
name_list = list(cheese_com['Name'].values)
# add a '-' to the list, in case the user doesn't want to pick a cheese 
# and only wants to search based on features
name_list.append('-')

def name_f(name):
    return name

name = interactive(
    name_f,
    name = sorted(name_list) # sort the list of names ('-' will be the first value)
)

#### Instantiate input question for the type

In [15]:
def cheese_type_f(cheese_type):
    return cheese_type

cheese_type = interactive(
    cheese_type_f,
    cheese_type=sorted([
        'whey', 'soft', 'semi-hard', 'fresh soft', 'semi-soft', 'soft-ripened', 'fresh firm', 'processed', 
        'hard', 'smear-ripened', 'artisan', 'firm', 'semi firm', 'brined', 'blue-veined', '-']) 
)

#### Instantiate input question for the texture

In [16]:
def texture_f(texture):
    return texture

texture = interactive(
    texture_f,
    texture=sorted(['flaky', 'fluffy', 'dry', 'close', 'open', 'runny', 'semi firm', 'brittle', 
                    'crystalline', 'chalky', 'elastic', 'springy', 'soft', 'stringy', 'chewy', 
                    'soft-ripened', 'oily', 'supple', 'creamy', 'grainy', 'compact', 'sticky', 
                    'buttery', 'dense', 'firm', 'spreadable', 'crumbly', 'smooth', '-'])
)

#### Instantiate input question for the flavor

In [17]:
def flavor_f(flavor):
    return flavor

def flavor_f(flavor):
    return flavor

flavor = interactive(
    flavor_f,
    flavor=sorted(['fruity', 'bitter', 'milky', 'oceanic', 'grassy', 'mild', 'piquant', 'spicy', 
            'strong', 'earthy', 'citrusy', 'acidic', 'salty', 'yeasty', 'woody', 'lemony', 
            'pronounced', 'tart', 'savory', 'licorice', 'sweet', 'herbaceous', 'butterscotch', 
            'pungent', 'sour', 'garlicky', 'crunchy', 'creamy', 'nutty', 'meaty', 'rustic', 'sharp', 
            'umami', 'full-flavored', 'smokey', 'floral', 'subtle', 'buttery', 'caramel', 'mineral', 
            'burnt caramel', 'mellow', 'vegetal', 'tangy', 'smokey ', 'mushroomy', 'smooth', '-'])
)

#### Instantiate input question for the aroma

In [18]:
def aroma_f(aroma):
    return aroma

aroma = interactive(
    aroma_f,
    aroma=sorted(['fruity', 'rich', 'toasty', 'milky', 'fermented', 'grassy', 'lanoline', 'fresh', 
           'mild', 'goaty', 'barnyardy', 'earthy', 'strong', 'aromatic', 'spicy', 'pleasant', 
           'mushroom', 'yeasty', 'herbal', 'woody', 'pronounced', 'musty', 'sweet', 'pungent', 
           'clean', 'ripe', 'garlicky', 'perfumed', 'nutty', 'raw nut', 'smokey', 'floral', 'subtle', 
           'stinky', 'buttery', 'caramel', 'lactic', '-'])
)

#### Instantiate input question for the type of rind

In [19]:
def rind_f(rind):
    return rind

rind = interactive(
    rind_f,
    rind=sorted(['leaf wrapped', 'rindless', 'ash coated', 'artificial', 'cloth wrapped', 'waxed', 
           'plastic', 'mold ripened', 'washed', 'bloomy', 'natural', '-'])
)

#### Instantiate input question for if the user wants vegetarian cheese (google 'rennet' if you're curious why this is neccesary)

In [20]:
def vegetarian_f(vegetarian):
    return vegetarian

vegetarian = interactive(
    vegetarian_f,
    vegetarian=['-' ,'yes', 'no']
)

#### Instantiate input question for the family of cheese

In [21]:
def family_f(family):
    return family

family = interactive(
    family_f,
    family=sorted(['Pecorino', 'Cheddar', 'Feta', 'Italian Cheese', 'Camembert', 'Blue', 
            'Raclette', 'Monterey Jack', 'Cottage', 'Gruyere', 'Pasta filata', 'Havarti', 
            'Parmesan', 'Swiss Cheese', 'Mozzarella', 'Cornish', 'Gorgonzola', 'Brie', 'Caciotta', 
            'Gouda', 'Tomme', 'Saint-Paulin', '-'])
)

#### Instantiate input question for the country of origin

In [22]:
def country_f(country):
    return country

country = interactive(
    country_f,
    country=sorted(['England', 'Austria', 'Syria', 'Pakistan', 'Netherlands', 'Afghanistan', 
             'Brazil', 'France', 'Israel', 'India', 'Poland', 'Nepal', 'Wales', 'Jordan', 
             'Holland', 'Bulgaria', 'Sweden', 'China', 'Denmark', 'Scotland', 'Ireland', 
             'Bangladesh', 'Belarus', 'Argentina', 'Australia', 'Ukraine', 'Eastern Mediterranean', 
             'Mauritania', 'Italy', 'Middle East', 'Finland', 'Great Britain', 'New Zealand', 'Palestine', 
             'Serbia', 'Armenia', 'Lebanon', 'Albania', 'Switzerland', 'Mexico', 'Andorra', 'Greece', 'Croatia', 
             'Romania', 'Japan', 'Isle of Man', 'Cyprus', 'Belgium', 'Azerbaijan', 'United Kingdom', 
             'Czech Republic', 'Canada', 'Spain', 'Egypt', 'Turkey', 'Iran', 'Russia', 'Chile', 'Norway', 
             'Iraq', 'Hungary', 'United States', 'Swaziland', 'Mongolia', 
             'Lithuania', 'Portugal', 'Tibet', 'Caribbean', 'Macedonia', 'Georgia', 'Slovakia', 'Iceland', 
             'Germany', '-'])
)

#### Instantiate input question for the animal the milk came from (or if vegan)

In [23]:
def milk_f(milk):
    return milk

milk = interactive(
    milk_f,
    milk=sorted(['vegan', 'cow', 'sheep', 'goat', 'mare', 'buffalo', 'water buffalo', 'camel', 'donkey', 
                'moose', 'reindeer', 'yak', '-'])
)

#### Instantiate input question for if user want's pasteurized cheese or not

In [24]:
def pasteurized_f(pasteurized):
    return pasteurized

pasteurized = interactive(
    pasteurized_f,
    pasteurized=['-' , 'pasteurized', 'unpasteurized']
)

#### Instantiate input question for the fat content (percentage)

In [25]:
def fat_content_f(fat_content):
    return fat_content

fat_content = interactive(
    fat_content_f,
    fat_content=['-', 'less than 0.15%', '0.16-3%', '4-19%', 
                 '20-39%', '40-59%', '60-74%', '75%+']
)

### Part 4:  Preference Selection

In [44]:
#print a prompt
print("Hello there, I will be your Digital Cheese Sommelier this evening.  Can I make a recommendation?")
print("Please select a cheese you like and/or any cheese categories you may fancy:")
print("(When you have selected your choices, please push the Recommend button at the bottom of the list)")

# Create a dropdown list for each input question listed above using display()
display(name)

display(cheese_type)

display(texture)

display(flavor)

display(aroma)

display(rind)

display(vegetarian)

display(family)

display(country)

display(milk)

display(pasteurized)

display(fat_content)



# return the selected feature from the name dropdown to name_drop as a string
name_drop = name.result


# return the selected feature from the type dropdown to cheese_type_drop as a string
# if no feature was selected
if cheese_type.result == '-':
    # save the value simply as '-'
    cheese_type_drop = cheese_type.result
#otherwise
else:
    # add "Featurename_" to the begining of the string
    cheese_type_drop = "Type_" + cheese_type.result


# return the selected feature from the texture dropdown to texture_drop as a string
# if no feature was selected
if texture.result == '-':
    # save the value simply as '-'
    texture_drop = texture.result
else:
    # add "Featurename_" to the begining of the string
    texture_drop = "Texture_" + texture.result


# return the selected feature from the flavor dropdown to flavor_drop as a string
# if no feature was selected
if flavor.result == '-':
    # save the value simply as '-'
    flavor_drop = flavor.result
else:
    # add "Featurename_" to the begining of the string
    flavor_drop = "Flavour_" + flavor.result


# return the selected feature from the aroma dropdown to aroma_drop as a string
# if no feature was selected
if aroma.result == '-':
    # save the value simply as '-'
    aroma_drop = aroma.result
else:
    # add "Featurename_" to the begining of the string
    aroma_drop = "Aroma_" + aroma.result


# return the selected feature from the rind dropdown to rind_drop as a string
# if no feature was selected
if rind.result == '-':
    # save the value simply as '-'
    rind_drop = rind.result
else:
    # add "Featurename_" to the begining of the string
    rind_drop = "Rind_" + rind.result

 
# return the selected feature from the vegetarian dropdown to vegetarian_drop as a string
# if no feature was selected
if vegetarian.result == '-':
    # save the value simply as '-'
    vegetarian_drop = vegetarian.result
else:
    # add "Featurename_" to the begining of the string
    vegetarian_drop = "Vegetarian_" + vegetarian.result


# return the selected feature from the family dropdown to family_drop as a string
# if no feature was selected
if family.result == '-':
    # save the value simply as '-'
    family_drop = family.result
else:
    # add "Featurename_" to the begining of the string
    family_drop = "Family_" + family.result

    
# return the selected feature from the country of origin dropdown to country_drop as a string
# if no feature was selected
if country.result == '-':
    # save the value simply as '-'
    country_drop = country.result
else:
    # add "Featurename_" to the begining of the string
    country_drop = "Country of origin_" + country.result


# return the selected feature from the milk dropdown to milk_drop as a string
# if no feature was selected
if milk.result == '-':
    # save the value simply as '-'
    milk_drop = milk.result
else:
    # add "Featurename_" to the begining of the string
    milk_drop = "Milk_" + milk.result


# return the selected feature from the pasteurized dropdown to pasteurized_drop as a string
# if no feature was selected
if pasteurized.result == '-':
    # save the value simply as '-'
    pasteurized_drop = pasteurized.result
else:
    # add "Featurename_" to the begining of the string
    pasteurized_drop = "Treatment_" + pasteurized.result

 
# return the selected feature from the fat content dropdown to fat_content_drop as a string
# if no feature was selected
if fat_content.result == '-':
    # save the value simply as '-'
    fat_content_drop = fat_content.result
else:
    # add "Featurename_" to the begining of the string
    fat_content_drop = "Fat_" + fat_content.result
    

# create a function that executes all code below this cell that runs it
def run_all(ev):
    display(Javascript('IPython.notebook.execute_cells_below()'))

# create a button that reads 'Recommend' that, when pressed, executes the function written above
# this executes the remaining code below this cell
button = widgets.Button(description="Recommend")
button.on_click(run_all)
# display the button
display(button)

<IPython.core.display.Javascript object>

Hello there, I will be your Digital Cheese Sommelier this evening.  Can I make a recommendation?
Please select a cheese you like and/or any cheese categories you may fancy:
(When you have selected your choices, please push the Recommend button at the bottom of the list)


interactive(children=(Dropdown(description='name', index=1562, options=('-', 'ADL Brick Cheese', 'ADL Mild Che…

interactive(children=(Dropdown(description='cheese_type', options=('-', 'artisan', 'blue-veined', 'brined', 'f…

interactive(children=(Dropdown(description='texture', options=('-', 'brittle', 'buttery', 'chalky', 'chewy', '…

interactive(children=(Dropdown(description='flavor', options=('-', 'acidic', 'bitter', 'burnt caramel', 'butte…

interactive(children=(Dropdown(description='aroma', options=('-', 'aromatic', 'barnyardy', 'buttery', 'caramel…

interactive(children=(Dropdown(description='rind', options=('-', 'artificial', 'ash coated', 'bloomy', 'cloth …

interactive(children=(Dropdown(description='vegetarian', options=('-', 'yes', 'no'), value='-'), Output()), _d…

interactive(children=(Dropdown(description='family', options=('-', 'Blue', 'Brie', 'Caciotta', 'Camembert', 'C…

interactive(children=(Dropdown(description='country', options=('-', 'Afghanistan', 'Albania', 'Andorra', 'Arge…

interactive(children=(Dropdown(description='milk', options=('-', 'buffalo', 'camel', 'cow', 'donkey', 'goat', …

interactive(children=(Dropdown(description='pasteurized', options=('-', 'pasteurized', 'unpasteurized'), value…

interactive(children=(Dropdown(description='fat_content', options=('-', 'less than 0.15%', '0.16-3%', '4-19%',…

Button(description='Recommend', style=ButtonStyle())

### Part 5:  Creating masks based on the selections made in the dropdown

#### These masks will be used to shape the dataframes used to create each instance of a recommender

In [45]:
# create a null mask that does not affect the dataframe.  for the sake of getting it done
# i arbitrarily chose the state of being yak milk or not being yak milk
# because yak milk is funny and this does the job
yak_mask = df["Milk_yak"] == 1
no_yak_mask = df["Milk_yak"] == 0

null_mask = yak_mask | no_yak_mask

In [46]:
# if nothing selected, set this mask = the null mask
if name_drop == '-':
    name_mask = null_mask
else:
    # set the mask to the the value in the data frame where the cheese name is the one selected
    name_mask = df.index == name_drop

In [47]:
# if nothing selected, set this mask = the null mask
if cheese_type_drop == '-':
    type_mask = null_mask
else:
    # set the mask to the the value in the data frame where the cheese type is the one selected
    type_mask = df[cheese_type_drop] == 1

In [48]:
# if nothing selected, set this mask = the null mask
if texture_drop == '-':
    texture_mask = null_mask
else:
    # set the mask to the the value in the data frame where the cheese texture is the one selected
    texture_mask = df[texture_drop] == 1

In [49]:
# if nothing selected, set this mask = the null mask
if flavor_drop == '-':
    flavor_mask = null_mask
else:
    # set the mask to the the value in the data frame where the cheese flavor is the one selected
    flavor_mask = df[flavor_drop] == 1

In [50]:
# if nothing selected, set this mask = the null mask
if aroma_drop == '-':
    aroma_mask = null_mask
else:
    # set the mask to the the value in the data frame where the cheese aroma is the one selected
    aroma_mask = df[aroma_drop] == 1

In [51]:
# if nothing selected, set this mask = the null mask
if rind_drop == '-':
    rind_mask = null_mask
else:
    # set the mask to the the value in the data frame where the cheese rind is the one selected
    rind_mask = df[rind_drop] == 1

In [52]:
# if nothing selected, set this mask = the null mask
if vegetarian_drop == '-':
    vegetarian_mask = null_mask
else:
    # set the mask to the the value in the data frame where the vegetarian option is the one selected
    vegetarian_mask = df[vegetarian_drop] == 1

In [53]:
# if nothing selected, set this mask = the null mask
if family_drop == '-':
    family_mask = null_mask
else:
    # set the mask to the the value in the data frame where the cheese family is the one selected
    family_mask = df[family_drop] == 1

In [54]:
# if nothing selected, set this mask = the null mask
if country_drop == '-':
    country_mask = null_mask
else:
    # set the mask to the the value in the data frame where the cheese country is the one selected
    country_mask = df[country_drop] == 1

In [55]:
# if nothing selected, set this mask = the null mask
if milk_drop == '-':
    milk_mask = null_mask
else:
    # set the mask to the the value in the data frame where the milk source animal is the one selected
    milk_mask = df[milk_drop] == 1

In [56]:
# if nothing selected, set this mask = the null mask
if pasteurized_drop == '-':
    pasteurized_mask = null_mask
else:# set the mask to the the value in the data frame where the pasteurization type is the one selected
    pasteurized_mask = df[pasteurized_drop] == 1

In [57]:
# if nothing selected, set this mask = the null mask
if fat_content_drop == '-':
    fat_mask = null_mask
else:# set the mask to the the value in the data frame where the fat content is the one selected
    fat_mask = df[fat_content_drop] == 1

In [58]:
# create a mask that gives the dataframe only when ALL of the features selected above are true
half_mask1 = type_mask & texture_mask & flavor_mask & aroma_mask & rind_mask & vegetarian_mask 
half_mask2 =  family_mask & country_mask & milk_mask & pasteurized_mask & fat_mask

# i did this by doing halves and adding them so the code would look prettier
full_mask = half_mask1 & half_mask2

### Part 6:  Create and run an instance of a recommender based on the masks that were made from the dropdown menus and applying them to the big binary dataframe.  After getting the list and pairwise distance from the 10 most closely related cheeses, display a dataframe with the most pertinent information in a readable format for the user.

In [59]:
# this function formats the output dataframe from the recommender into a dataframe of those same cheeses
# with all of the readable, desirable information from the original dataframe, 
# as well as the pairwise distance relationships

def format_display(rec_df, dif_factor):
    
    # set a dataframe that will be what is displayed at the end, 
    # referencing the cheeses and ratings the recommender gets
    chz_display = cheese_com[['Name', 'Milk', 'Country of origin', 'Type', 
                'Texture', 'Rind', 'Flavour', 'Aroma', 'Vegetarian', 'Family', 'Fat content', 'Description']]
    
    # replace null values with N/A
    chz_display.fillna('N/A', inplace=True)
    # recall that during EDA, null values of 'Fat content' were filled with 99999 
    # so the formatting code would run
    chz_display.replace(99999, 'N/A', inplace=True)
    
    # the descripions are long enough without the unintentionally scraped website add.  drop this from the display
    for i in range(len(chz_display)):
        #chz_display.iloc[i]['Description'].replace('Over 500,000 page views per month, Put your store on our map!Want to be listed on cheese.com? Here could be your shop!', '', inplace=True)
        chz_display.iloc[i]['Description'] = chz_display.iloc[i]['Description'].replace('Over 500,000 page views per month, Put your store on our map!Want to be listed on cheese.com? Here could be your shop!', '')

    # with some help from https://stackoverflow.com/questions/23482668/sorting-by-a-custom-list-in-pandas
    # Define the sorter
    sort_list = list(rec_df.index)
    # Create the dictionary that defines the order for sorting
    sorterIndex = dict(zip(sort_list,range(len(sort_list))))

    mask_dict = {}
    mask_list = []
    for i in range(len(sort_list)):
        mask = chz_display['Name'] == i
        mask_dict[i] = chz_display['Name'] == sort_list[i]



    mask_list = mask_dict[0] | mask_dict[1] | mask_dict[2] | mask_dict[3] | mask_dict[4] | mask_dict[5] | mask_dict[6] | mask_dict[7] | mask_dict[8] | mask_dict[9]

    display_guy = chz_display[mask_list]

    display_guy['Name_rank'] = display_guy[mask_list]['Name'].map(sorterIndex)
    display_guy.sort_values(by='Name_rank', inplace = True)
    display_guy.drop('Name_rank', 1, inplace = True)

    # reset the index so it won't just be the alphabetical order of all the cheeses
    # this will give an index that reflects the order of most closely related 
    display_guy.reset_index(drop=True, inplace=True)
    # add 1 to the index so it will rate from 1-10 instead of 0-9
    display_guy.index = display_guy.index + 1

    

    # add the pairwise distance calculated by the recommender to this display dataframe as 'Difference Factor'
    display_guy['Difference Factor'] = dif_factor

    # make the cell with the Description large enough to read the entire description
    # code tip from https://stackoverflow.com/questions/39680147/can-i-set-variable-column-widths-in-pandas
    display_guy = display_guy.style.set_properties(subset=['Description'], **{'width': '300px'})

    return display_guy


In [61]:
# if no cheese name was entered, then all that needs to be returned is the dataframe filtered through all
# of the masks specified by the user
if name_drop == '-':
    # save the original dataframe that has been filtered as a dataframe named 'display_frame'
    # return only the first 10 values to maintain consistency between the two conditionals
    display_frame = df[full_mask].sort_values(by='Name')[0:10]
    
    # set a variable 'dif_factor' as the pairwise distance calculated by the recommender
    # since a recommender wasn't technically run here, all of the cheeses fit what was searched for 100%
    # therefore the list will just be a zero value for each cheese returned (sometimes it will be less then 10)
    
    # instatiate an empty list
    dif_factor = []
    
    # for every entry in the display frame, append a 0 to this list
    for i in range(len(display_frame)):
        dif_factor.append(0)


    
# if a cheese name is entered by the user, do the following
else:
    # first create a dataframe that is filtered by all of the masks specified by the user, and also
    # includes the cheese that was named by the user
    df2 = df[name_mask |full_mask]
    
    # calculate the cosine similarity for each cheese in this dataframe using the pairwise_distances function
    # this will return a square matrix, comparing every cheese with every other cheese in the dataframe
    recommender = pairwise_distances(df2, metric='cosine')

    # Create a distances DataFrame the matrix defined above, filling in the cheese names (which were in the index
    # of the old dataframe) as the column names and indicies for the recommender dataframe
    recommender_df = pd.DataFrame(recommender, columns=df2.index, index=df2.index)

    # use the recomender to return the first 10 cheeses (NOT including the cheese named, hence sort [1:11])
    # put this list into a dataframe called 'display_frame'
    display_frame = pd.DataFrame(recommender_df[name_drop].sort_values()[1:11])
    
    # set a variable 'dif_factor' as the pairwise distance calculated by the recommender
    dif_factor = display_frame.values


    
# if there were no cheeses in the dataset with the sepcified error message (the length will be zero),
# then print this error message
if len(display_frame) == 0:
    print("Sorry! There are no cheeses that have all of those specific features")
    print("Please adjust the selected features and try again")
  
# if no error messages, display the dataframe 
else:
    # format the display_frame to include a readable list of important cheese data, and display it
    display(format_display(display_frame, dif_factor))



Unnamed: 0,Name,Milk,Country of origin,Type,Texture,Rind,Flavour,Aroma,Vegetarian,Family,Fat content,Description,Difference Factor
1,Fresh Hand-Stretched Mozzarella,pasteurized cow's milk,United States,"fresh soft, artisan","creamy, elastic, smooth, springy and stringy",natural,"full-flavored, mild, milky",fresh,,Mozzarella,,"Fresh Hand-Stretched Mozzarella is a pasta filata cheese made from heat-treated cow's milk. This fresh cheese with a full-bodied flavour is individually wrapped in balls, weighing from ¼ lb to ½ lb. It is available only from April through November.",0.419052
2,Mozzarellissima,pasteurized cow's milk,United States,"semi-soft, artisan","elastic, springy, stringy and supple",rindless,"buttery, mild, milky",mild,yes,Pasta filata,,"Mozzarellissima is an award-winning stretch Mozzarella from Saputo. It is a cheese that belongs to the pasta filata family and is known for its superb melting, stretching, browning properties and elastic consistency. Available in both regular and light varieties, Mozzarellissima is a favourite in pizzas, lasagna and a variety of baked dishes.",0.419052
3,Fior Di Latte,pasteurized cow's milk,Australia,"semi-soft, brined",elastic,rindless,"mild, milky, tangy","fresh, mild",yes,Mozzarella,17.1,"Fior Di Latte is semi-soft, fresh cheese made in the style of Italian mozzarella. This cow's milk cheese is produced by Paesanella Cheese Manufacturers in New South Wales, Australia. Smooth, extremely fresh, little tangy in flavour, elastically textured cheese finds its way mostly on pizzas and other over based dishes because of its superb melting characteristics.",0.466667
4,Classics Fresh Mozzarella,pasteurized cow's milk,United States,"soft, brined","creamy, elastic, smooth, soft, stringy and supple",rindless,"acidic, mild, milky, spicy",fresh,yes,Mozzarella,,"Classics Fresh Mozzarella is a fresh milky cheese, made in the style of Italian fresh mozzarella. This cow's milk cheese is produced by Crave Brothers - Charles, Thomas, Mark and George at their Crave Brothers Dairy Farm in Wisconsin.There are different types of mozzarella available at the farm:- perline (pearl size), - ciliegine (cherry size), - bocconcini (ball size), - ovoline (egg size), medallions, - 1# & 8 oz., balls, - 1# logs- marinated ciliegine (marinated in an olive oil/canola oil blended with spice mix)- fresh curd to pull fresh mozzarella by hand.",0.466886
5,Fresh Jack,pasteurized cow's milk,United States,semi-soft,creamy and springy,,mild,fresh,yes,Monterey Jack,,"Fresh Jack is Monterey Jack cheese made from whole or skimmed milk and aged for only a week. This creamy, semi-soft cheese is made from cow's milk. The high moisture content and freshness of the cheese tastes wonderful with sandwiches and cooked dishes. Fresh Jack is very similar to Edam in texture, but it is creamier and more springy. It is used as a table cheese in salads and also for melting.",0.510102
6,Sea Change,cow's milk,United States,"semi-soft, artisan",elastic and smooth,natural,"buttery, mild, milky","fruity, lactic, yeasty",,,,"Named for a passage in Shakespeare's 'The Tempest', Sea Change is the second release from The Mystic Cheese Company. It is an Italian Stracchino-style cheese with a yeasty, doughy paste of 4 ounce disk. The yeast-ripened cow's milk cheese reveals a lactic tang and lingering fruity yeasty notes. These pair well with chestnuts, hickories, sour dough bread and grass fed beef. Wine-wise, accompany it with a Prosecco or Asti.",0.570331
7,Beehive Fresh,pasteurized cow's milk,United States,"fresh soft, artisan",buttery and soft,,"buttery, mild","buttery, fresh, mild",yes,Mozzarella,,"This cheese is currently unavailable on official site.Beehive Fresh is a mozzarella style, high-moisture cheese made with pasteurized Jersey milk. It is produced by Beehive Cheese Company in Utah, United States. This mild and buttery cheese can be spread easily because of its high moisture content. It pairs well with Medium-bodied red wines, citrusy white wines, Belgian-style white beer. Enjoy the cheese with fresh tomato, pizza, lasagna and Mexican dishes.",0.585961
8,Mun-chee,pasteurized cow's milk,United States,"semi-soft, processed",creamy and smooth,natural,"mild, sweet",mild,,,,"Mun-chee is a sweet, mild semi-soft processed American cheese from the DCI cheese company. Also called Sweet Mun-chee sometimes, it is a great cheese for sandwiches and wraps. It could be eaten alone as a snack or melted on omelets, macaroni and grilled sandwiches. This easy to carry cheese is the perfect accompaniment in kid’s lunchboxes, picnic and camping trips. Mun-chee is a certified OU kosher product.",0.610751
9,Fresh Fior Di Latte,pasteurized cow's milk,Australia,semi-soft,"elastic, smooth and soft",rindless,"milky, smooth, sweet","milky, sweet",yes,Mozzarella,14.6,"Fresh Fior Di Latte cheese is same as Bocconcini cheese. This cow's milk cheese is produced by Paesanella Cheese Manufacturers in New South Wales, Australia. Being suspended in water, the cheese maintains its fressness. Soft, fresh, elastically textured and chewy in consistency with a hint of salt cheese finds its way mostly on pizzas, salads, and other side dishes.",0.612702
10,Coupole,pasteurized goat's milk,United States,"soft, artisan","buttery, creamy, dense and fluffy",mold ripened,"mild, milky","fresh, mild, milky",yes,,,"Coupole, named for its dome shape is a soft cheese made using fresh pasteurized goat's milk, produced by Vermont Creamery, Websterville, Vermont, US. The cheese is dense and creamy in texture, mild and fresh in taste. The paste is clean and lemony while the wrinkled rind is strong, intense in flavour that makes the cheese stand out on a cheese platter.Coupole has won several awards, including first place at the 2012 American Cheese Society, silver medal at the 2011 World Cheese Awards.",0.612702
