# Cheese Recommender

## EDA and Cleaning

In [1]:
import pandas as pd 
import numpy as np
import requests
from bs4 import BeautifulSoup
import time
import unicodedata

from scipy import sparse
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.preprocessing import StandardScaler

from ipywidgets import interact, interactive, fixed, interact_manual
from ipywidgets import widgets, interact, interact_manual, fixed
from IPython.display import display, clear_output, HTML, Javascript

import warnings
warnings.filterwarnings('ignore')


Recommending cheese based on blah blah blah...  Cheese.com very lenient API

In [2]:

def dummy_listed_features(df, feature):

    # for ceratin features, some cells have multiple values
    # to get all of the values of these features, first create a list 
    # of all of the UNIQUE values that appear in the selected feature

    # fill the null values of the feature with a string value of "None" so as to not throw an error
    df[feature].fillna('None', inplace=True)

    # create an empty string
    strang = ""
    # look through every cell of the feature
    for i in list(df[feature].unique()):
        # add every entry in the cell into a long string seperated by a comma and a space
        strang += i + ', '

    # strip the extraneous comma and whitespace from the end of the newly gathered string
    # replace ' and' with a comma
    strang = strang.replace(' and', ',').strip(', ')

    # seperate the long string of values by a comma and space into a list of values in the feature
    # convert this list into a set to drop the values that appear more then once in an entry for the feature
    # convert this back into a list so it can be indexed 
    feature_list = list(set(strang.split(', ')))

    # initialize a dummy column for each unique value of the feature
    for i in feature_list:
        # initialize each column as 0
        df[f'{feature}_{i}']=0

    # go through every cheese in the list
    for i in range(len(df)):

        # look at each value in each cell by seperating by commas, putting it into a list, 
        # and iterating through that list (remember to replace any ' and' with a comma before creating list)
        for k in df[feature][i].replace(' and', ',').split(', '):
            # if the value exists in the feature for a particular cheese, set the dummy value to 1
            df[f'{feature}_{k}'][i] = 1

            
            
    # once feature has been dummied, drop the original        
    #df.drop(columns=feature, inplace=True)
    
    # also drop the feature column created when the value was 'None' because that was a null value before        
    df.drop(columns=f'{feature}_None', inplace=True)

    # remove the 'None' value that was stored in the list, as it has been dropped from the dataframe
    feature_list.remove('None')
    
    # print out the values in each feature (for reference)
    print(f'List of values for {feature} : ')
    
    print(feature_list) 
    print()


In [3]:
#read big_cheese2.csv into 'cheese_com'
cheese_com = pd.read_csv('./data/model_testing2.csv')

In [4]:
cheese_com

Unnamed: 0,Name,Milk,Country of origin,Region,Type,Texture,Rind,Flavour,Aroma,Vegetarian,...,Milk_mare,Milk_moose,Milk_reindeer,Milk_water buffalo,Milk_yak,Treatment_pasteurized,Treatment_unpasteurized,Milk_blend yes,Triple Cream,Double Cream
0,Abbaye de Belloc,unpasteurized cow's and sheep's milk,France,Pays Basque,"semi-hard, artisan","creamy, dense and firm",natural,burnt caramel,lanoline,yes,...,0,0,0,0,0,0,1,1,0,0
1,Abbaye de Belval,cow's milk,France,,semi-hard,elastic,washed,,aromatic,no,...,0,0,0,0,0,0,0,0,0,0
2,Abbaye de Citeaux,unpasteurized cow's milk,France,Burgundy,"semi-soft, artisan, brined","creamy, dense and smooth",washed,"acidic, milky, smooth","barnyardy, earthy",no,...,0,0,0,0,0,0,1,0,0,0
3,Abbaye de Timadeuc,pasteurized cow's milk,France,province of Brittany,semi-hard,soft,washed,,,no,...,0,0,0,0,0,1,0,0,0,0
4,Abbaye du Mont des Cats,pasteurized cow's milk,France,Nord-Pas-de-Calais,"semi-soft, artisan, brined",smooth and supple,washed,"milky, salty",floral,no,...,0,0,0,0,0,1,0,0,0,0
5,Abbot’s Gold,pasteurized cow's milk,"England, Great Britain and United Kingdom",North Yorkshire,semi-hard,"creamy, crumbly, dense and semi firm",natural,"mild, sweet, tangy",aromatic,yes,...,0,0,0,0,0,1,0,0,0,0
6,Abertam,sheep's milk,Czech Republic,Karlovy Vary,"hard, artisan",firm,natural,"acidic, strong, tangy",,no,...,0,0,0,0,0,0,0,0,0,0
7,Abondance,unpasteurized cow's milk,France,"Haute-Savoie, Abondance","semi-hard, artisan","creamy, open and supple",natural,"acidic, buttery, fruity, sweet",nutty,no,...,0,0,0,0,0,0,1,0,0,0
8,Acapella,goat's milk,United States,California,"soft, soft-ripened",,,buttery,"fresh, herbal",no,...,0,0,0,0,0,0,0,0,0,0
9,Accasciato,Buffalo's and cow's milk,Italy,Campania,semi-hard,firm,natural,sweet,"aromatic, fresh",no,...,0,0,0,0,0,0,0,1,0,0


In [5]:
df = cheese_com

In [6]:
df.head()

Unnamed: 0,Name,Milk,Country of origin,Region,Type,Texture,Rind,Flavour,Aroma,Vegetarian,...,Milk_mare,Milk_moose,Milk_reindeer,Milk_water buffalo,Milk_yak,Treatment_pasteurized,Treatment_unpasteurized,Milk_blend yes,Triple Cream,Double Cream
0,Abbaye de Belloc,unpasteurized cow's and sheep's milk,France,Pays Basque,"semi-hard, artisan","creamy, dense and firm",natural,burnt caramel,lanoline,yes,...,0,0,0,0,0,0,1,1,0,0
1,Abbaye de Belval,cow's milk,France,,semi-hard,elastic,washed,,aromatic,no,...,0,0,0,0,0,0,0,0,0,0
2,Abbaye de Citeaux,unpasteurized cow's milk,France,Burgundy,"semi-soft, artisan, brined","creamy, dense and smooth",washed,"acidic, milky, smooth","barnyardy, earthy",no,...,0,0,0,0,0,0,1,0,0,0
3,Abbaye de Timadeuc,pasteurized cow's milk,France,province of Brittany,semi-hard,soft,washed,,,no,...,0,0,0,0,0,1,0,0,0,0
4,Abbaye du Mont des Cats,pasteurized cow's milk,France,Nord-Pas-de-Calais,"semi-soft, artisan, brined",smooth and supple,washed,"milky, salty",floral,no,...,0,0,0,0,0,1,0,0,0,0


In [7]:
listed_features = ["Type", "Texture", "Flavour", "Aroma", "Rind", "Vegetarian", "Family", "Country of origin"]

for i in listed_features:
    dummy_listed_features(df, i)
    df = df.drop(columns=i)
    

List of values for Type : 
['semi-soft', 'blue-veined', 'brined', 'soft-ripened', 'artisan', 'soft', 'firm', 'smear-ripened', 'whey', 'semi firm', 'semi-hard', 'hard', 'fresh soft', 'fresh firm', 'processed']

List of values for Texture : 
['flaky', 'soft-ripened', 'stringy', 'grainy', 'fluffy', 'open', 'smooth', 'compact', 'soft', 'crumbly', 'brittle', 'springy', 'dense', 'spreadable', 'chewy', 'dry', 'elastic', 'creamy', 'crystalline', 'oily', 'chalky', 'supple', 'buttery', 'firm', 'semi firm', 'close', 'sticky', 'runny']

List of values for Flavour : 
['herbaceous', 'burnt caramel', 'vegetal', 'rustic', 'oceanic', 'salty', 'mellow', 'smokey', 'fruity', 'caramel', 'woody', 'sweet', 'floral', 'smooth', 'full-flavored', 'sour', 'savory', 'mineral', 'mushroomy', 'lemony', 'sharp', 'nutty', 'milky', 'smokey ', 'pronounced', 'meaty', 'pungent', 'acidic', 'strong', 'grassy', 'creamy', 'butterscotch', 'tangy', 'crunchy', 'tart', 'bitter', 'earthy', 'citrusy', 'buttery', 'licorice', 'garlick

In [8]:
cheese_com['Region'].nunique()

583

In [9]:
cheese_com

Unnamed: 0,Name,Milk,Country of origin,Region,Type,Texture,Rind,Flavour,Aroma,Vegetarian,...,Type_soft,Type_firm,Type_smear-ripened,Type_whey,Type_semi firm,Type_semi-hard,Type_hard,Type_fresh soft,Type_fresh firm,Type_processed
0,Abbaye de Belloc,unpasteurized cow's and sheep's milk,France,Pays Basque,"semi-hard, artisan","creamy, dense and firm",natural,burnt caramel,lanoline,yes,...,0,0,0,0,0,1,0,0,0,0
1,Abbaye de Belval,cow's milk,France,,semi-hard,elastic,washed,,aromatic,no,...,0,0,0,0,0,1,0,0,0,0
2,Abbaye de Citeaux,unpasteurized cow's milk,France,Burgundy,"semi-soft, artisan, brined","creamy, dense and smooth",washed,"acidic, milky, smooth","barnyardy, earthy",no,...,0,0,0,0,0,0,0,0,0,0
3,Abbaye de Timadeuc,pasteurized cow's milk,France,province of Brittany,semi-hard,soft,washed,,,no,...,0,0,0,0,0,1,0,0,0,0
4,Abbaye du Mont des Cats,pasteurized cow's milk,France,Nord-Pas-de-Calais,"semi-soft, artisan, brined",smooth and supple,washed,"milky, salty",floral,no,...,0,0,0,0,0,0,0,0,0,0
5,Abbot’s Gold,pasteurized cow's milk,"England, Great Britain and United Kingdom",North Yorkshire,semi-hard,"creamy, crumbly, dense and semi firm",natural,"mild, sweet, tangy",aromatic,yes,...,0,0,0,0,0,1,0,0,0,0
6,Abertam,sheep's milk,Czech Republic,Karlovy Vary,"hard, artisan",firm,natural,"acidic, strong, tangy",,no,...,0,0,0,0,0,0,1,0,0,0
7,Abondance,unpasteurized cow's milk,France,"Haute-Savoie, Abondance","semi-hard, artisan","creamy, open and supple",natural,"acidic, buttery, fruity, sweet",nutty,no,...,0,0,0,0,0,1,0,0,0,0
8,Acapella,goat's milk,United States,California,"soft, soft-ripened",,,buttery,"fresh, herbal",no,...,1,0,0,0,0,0,0,0,0,0
9,Accasciato,Buffalo's and cow's milk,Italy,Campania,semi-hard,firm,natural,sweet,"aromatic, fresh",no,...,0,0,0,0,0,1,0,0,0,0


In [10]:
df

Unnamed: 0,Name,Milk,Region,Producers,Description,Fat content,Fat_less than 0.15%,Fat_0.16-3%,Fat_4-19%,Fat_20-39%,...,Country of origin_Argentina,Country of origin_China,Country of origin_Poland,Country of origin_Czech Republic,Country of origin_Wales,Country of origin_Canada,Country of origin_Spain,Country of origin_Iran,Country of origin_Switzerland,Country of origin_Armenia
0,Abbaye de Belloc,unpasteurized cow's and sheep's milk,Pays Basque,Abbaye de Notre-Dame de Belloc,Abbaye de Belloc is also known as 'Abbaye Notr...,99999.00,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Abbaye de Belval,cow's milk,,,This cheese is also known as Le Trappiste de B...,43.00,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Abbaye de Citeaux,unpasteurized cow's milk,Burgundy,Frères Frédéric and Joel,The Abbaye de Citeaux cheese comes from the Ci...,99999.00,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Abbaye de Timadeuc,pasteurized cow's milk,province of Brittany,Abbaye Cistercienne NOTRE-DAME DE TIMADEUC,"Being direct descendant of the Port du Salut, ...",99999.00,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Abbaye du Mont des Cats,pasteurized cow's milk,Nord-Pas-de-Calais,Abbaye du Mont des Cats,The Abbaye du Mont des Cats cheese is made by ...,50.00,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,Abbot’s Gold,pasteurized cow's milk,North Yorkshire,Wensleydale Creamery,"Abbot's Gold, also known by other names such a...",99999.00,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,Abertam,sheep's milk,Karlovy Vary,,Abertam is a traditional sheep's milk cheese m...,45.00,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
7,Abondance,unpasteurized cow's milk,"Haute-Savoie, Abondance",,Tomme d'Abondance or Abondance is a medium-siz...,48.00,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,Acapella,goat's milk,California,Andante Dairy,Acapella is a soft goat's milk cheese produced...,99999.00,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,Accasciato,Buffalo's and cow's milk,Campania,Casa Madaio,"Accasciato meaning 'collapsed' in Italian, is ...",99999.00,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [11]:
df.drop(columns=['Milk', 'Region', 'Fat content', 'Producers', 'Description'], inplace=True)




In [12]:
df

Unnamed: 0,Name,Fat_less than 0.15%,Fat_0.16-3%,Fat_4-19%,Fat_20-39%,Fat_40-59%,Fat_60-74%,Fat_75%+,Milk_vegan,Milk_cow,...,Country of origin_Argentina,Country of origin_China,Country of origin_Poland,Country of origin_Czech Republic,Country of origin_Wales,Country of origin_Canada,Country of origin_Spain,Country of origin_Iran,Country of origin_Switzerland,Country of origin_Armenia
0,Abbaye de Belloc,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
1,Abbaye de Belval,0,0,0,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
2,Abbaye de Citeaux,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
3,Abbaye de Timadeuc,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
4,Abbaye du Mont des Cats,0,0,0,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
5,Abbot’s Gold,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
6,Abertam,0,0,0,0,1,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
7,Abondance,0,0,0,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
8,Acapella,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,Accasciato,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [13]:
df.set_index('Name', inplace=True)

In [14]:
df

Unnamed: 0_level_0,Fat_less than 0.15%,Fat_0.16-3%,Fat_4-19%,Fat_20-39%,Fat_40-59%,Fat_60-74%,Fat_75%+,Milk_vegan,Milk_cow,Milk_goat,...,Country of origin_Argentina,Country of origin_China,Country of origin_Poland,Country of origin_Czech Republic,Country of origin_Wales,Country of origin_Canada,Country of origin_Spain,Country of origin_Iran,Country of origin_Switzerland,Country of origin_Armenia
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbaye de Belloc,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye de Belval,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye de Citeaux,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye de Timadeuc,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye du Mont des Cats,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbot’s Gold,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abertam,0,0,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
Abondance,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Acapella,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
Accasciato,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [15]:
recommender = pairwise_distances(df, metric='cosine')

In [16]:
recommender

array([[0.00000000e+00, 7.16526645e-01, 5.99108137e-01, ...,
        6.22035527e-01, 6.61938298e-01, 5.71428571e-01],
       [7.16526645e-01, 2.22044605e-16, 6.46446609e-01, ...,
        8.33333333e-01, 7.76393202e-01, 8.11017763e-01],
       [5.99108137e-01, 6.46446609e-01, 0.00000000e+00, ...,
        8.82148870e-01, 7.62829175e-01, 7.32738758e-01],
       ...,
       [6.22035527e-01, 8.33333333e-01, 8.82148870e-01, ...,
        0.00000000e+00, 7.01857603e-01, 8.11017763e-01],
       [6.61938298e-01, 7.76393202e-01, 7.62829175e-01, ...,
        7.01857603e-01, 0.00000000e+00, 9.15484575e-01],
       [5.71428571e-01, 8.11017763e-01, 7.32738758e-01, ...,
        8.11017763e-01, 9.15484575e-01, 0.00000000e+00]])

In [17]:
recommender_df = pd.DataFrame(recommender, columns=df.index, index=df.index)

In [18]:
recommender_df

Name,Abbaye de Belloc,Abbaye de Belval,Abbaye de Citeaux,Abbaye de Timadeuc,Abbaye du Mont des Cats,Abbot’s Gold,Abertam,Abondance,Acapella,Accasciato,...,Za'atar Burrata,Zamorano,Zanetti Grana Padano,Zanetti Parmigiano Reggiano,"Zartschmelzend, Kräftig Würziger Rahm-Hartkäse",Zelu Koloria,Zigljen Iz Extra Mature,Zigljen Iz Mosta,Zimbro,Zwitser
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbaye de Belloc,0.000000,7.165266e-01,0.599108,6.969542e-01,0.785714,0.611078,0.677671,0.546257,1.000000e+00,0.597089,...,0.740719,8.218258e-01,0.629375,0.677671,0.571429,0.611078,0.571429,0.622036,0.661938,0.571429
Abbaye de Belval,0.716527,2.220446e-16,0.646447,3.318469e-01,0.527544,0.742752,0.786799,0.571254,8.750000e-01,0.573599,...,0.828501,7.642977e-01,0.901942,0.893400,0.716527,0.742752,0.905509,0.833333,0.776393,0.811018
Abbaye de Citeaux,0.599108,6.464466e-01,0.000000,6.220355e-01,0.398662,0.818098,0.773866,0.575563,9.116117e-01,0.849244,...,0.757464,9.166667e-01,0.722650,0.773866,0.732739,0.818098,0.866369,0.882149,0.762829,0.732739
Abbaye de Timadeuc,0.696954,3.318469e-01,0.622036,2.220446e-16,0.494924,0.724990,0.886039,0.633321,8.663694e-01,0.658118,...,0.816660,8.740118e-01,0.895172,0.886039,0.696954,0.633321,0.797969,0.732739,0.880477,0.797969
Abbaye du Mont des Cats,0.785714,5.275444e-01,0.398662,4.949237e-01,0.000000,0.870359,0.758253,0.611078,9.055089e-01,0.838835,...,0.740719,8.218258e-01,0.851750,0.838835,0.785714,0.805539,0.714286,0.748024,0.830969,0.857143
Abbot’s Gold,0.611078,7.427521e-01,0.818098,7.249905e-01,0.870359,0.000000,0.853746,0.705882,1.000000e+00,0.634364,...,0.647059,8.383096e-01,0.730931,0.853746,0.740719,0.705882,0.611078,0.714169,0.693214,0.611078
Abertam,0.677671,7.867993e-01,0.773866,8.860394e-01,0.758253,0.853746,0.000000,0.634364,8.933996e-01,0.727273,...,0.926873,4.974811e-01,0.749127,0.727273,0.758253,0.561237,0.677671,0.715732,0.809307,0.758253
Abondance,0.546257,5.712535e-01,0.575563,6.333206e-01,0.611078,0.705882,0.634364,0.000000,8.285014e-01,0.634364,...,0.764706,6.766192e-01,0.596396,0.707491,0.546257,0.705882,0.740719,0.771335,0.769911,0.675898
Acapella,1.000000,8.750000e-01,0.911612,8.663694e-01,0.905509,1.000000,0.893400,0.828501,2.220446e-16,0.786799,...,0.657003,7.642977e-01,1.000000,1.000000,1.000000,0.828501,1.000000,1.000000,0.888197,0.905509
Accasciato,0.597089,5.735986e-01,0.849244,6.581183e-01,0.838835,0.634364,0.727273,0.634364,7.867993e-01,0.000000,...,0.853746,7.989924e-01,0.749127,0.727273,0.758253,0.707491,0.677671,0.715732,0.809307,0.758253


In [19]:
recommender_df['ADL Mild Cheddar'].sort_values()[1:11]

Name
Boivin Medium Cheddar             0.210648
Boivin Extra Aged Cheddar         0.298354
ADL Brick Cheese                  0.327327
Bothwell Black Truffle Cheddar    0.414631
Longhorn                          0.439551
Colby                             0.461538
St-Fidèle Swiss                   0.481125
Vampire Slayer                    0.503861
Cheese Curds                      0.529129
Coalho                            0.538462
Name: ADL Mild Cheddar, dtype: float64

In [20]:
recommender_df['Aged Cashew & Blue Green Algae Cheese'].sort_values()[1:11]

Name
Aged Cashew Nut & Kale Cheese      0.039231
Aged Cashew & Dulse Cheese         0.122942
Aged Cashew & Brazil Nut Cheese    0.230769
Macadamia Nut Cheese               0.279423
Aged Cashew & Hemp Seed Cheese     0.283885
Aged Cashew Nut Cheese             0.332876
Stella Feta                        0.481125
Stella Blue                        0.498255
Raw Brie Style Cashew Cheese       0.498255
Moody Blue                         0.514637
Name: Aged Cashew & Blue Green Algae Cheese, dtype: float64

In [21]:
recommender_df['Shropshire Blue'].sort_values()[1:11]

Name
Adelost                         0.286976
Cropwell Bishop Blue Stilton    0.327786
Organic Beer-Brined Moochego    0.341855
Stella Italian Sharp            0.369874
Moody Blue                      0.393661
Oxford Blue                     0.411765
Stella Blue                     0.414982
Strathdon Blue                  0.414982
Little Black Bomber             0.416617
Holey Cow                       0.416617
Name: Shropshire Blue, dtype: float64

In [22]:
recommender_df['Shropshire Blue'].sort_values()[1:11]

Name
Adelost                         0.286976
Cropwell Bishop Blue Stilton    0.327786
Organic Beer-Brined Moochego    0.341855
Stella Italian Sharp            0.369874
Moody Blue                      0.393661
Oxford Blue                     0.411765
Stella Blue                     0.414982
Strathdon Blue                  0.414982
Little Black Bomber             0.416617
Holey Cow                       0.416617
Name: Shropshire Blue, dtype: float64

In [23]:
shropshire_list = list(recommender_df['Shropshire Blue'].sort_values()[1:11].index)
shropshire_list

['Adelost',
 'Cropwell Bishop Blue Stilton',
 'Organic Beer-Brined Moochego',
 'Stella Italian Sharp',
 'Moody Blue',
 'Oxford Blue',
 'Stella Blue',
 'Strathdon Blue',
 'Little Black Bomber',
 'Holey Cow']

In [24]:
# Create the dictionary that defines the order for sorting
sorterIndex = dict(zip(shropshire_list,range(len(shropshire_list))))
sorterIndex

{'Adelost': 0,
 'Cropwell Bishop Blue Stilton': 1,
 'Organic Beer-Brined Moochego': 2,
 'Stella Italian Sharp': 3,
 'Moody Blue': 4,
 'Oxford Blue': 5,
 'Stella Blue': 6,
 'Strathdon Blue': 7,
 'Little Black Bomber': 8,
 'Holey Cow': 9}

In [25]:
range(len(list(recommender_df['Shropshire Blue'].sort_values()[1:11].index)))

range(0, 10)

In [26]:
# with some help from https://stackoverflow.com/questions/23482668/sorting-by-a-custom-list-in-pandas
# Define the sorter
shropshire_list = list(recommender_df['Shropshire Blue'].sort_values()[1:11].index)
# Create the dictionary that defines the order for sorting
sorterIndex = dict(zip(shropshire_list,range(len(shropshire_list))))

mask_dict = {}
mask_list = []
for i in range(len(shropshire_list)):
    mask = cheese_com['Name'] == i
    mask_dict[i] = cheese_com['Name'] == shropshire_list[i]
    
    

mask_list = mask_dict[0] | mask_dict[1] | mask_dict[2] | mask_dict[3] | mask_dict[4] | mask_dict[5] | mask_dict[6] | mask_dict[7] | mask_dict[8] | mask_dict[9]

display_guy = cheese_com[mask_list]

display_guy['Name_rank'] = display_guy[mask_list]['Name'].map(sorterIndex)
display_guy.sort_values(by='Name_rank', inplace = True)
display_guy.drop('Name_rank', 1, inplace = True)

display_guy



Unnamed: 0,Name,Milk,Country of origin,Region,Type,Texture,Rind,Flavour,Aroma,Vegetarian,...,Type_soft,Type_firm,Type_smear-ripened,Type_whey,Type_semi firm,Type_semi-hard,Type_hard,Type_fresh soft,Type_fresh firm,Type_processed
12,Adelost,pasteurized cow's milk,Sweden,,"semi-soft, blue-veined",creamy,natural,"salty, sharp, tangy",strong,yes,...,0,0,0,0,0,0,0,0,0,0
500,Cropwell Bishop Blue Stilton,pasteurized cow's milk,"England, Great Britain and United Kingdom",Nottinghamshire,"semi-soft, artisan, blue-veined","creamy, crumbly, firm and smooth",natural,"creamy, strong, tangy","pungent, rich, spicy",yes,...,0,0,0,0,0,0,0,0,0,0
1191,Organic Beer-Brined Moochego,pasteurized cow's milk,United States,NY,semi-soft,"creamy, firm and smooth",natural,full-flavored,rich,yes,...,0,0,0,0,0,0,0,0,0,0
1613,Stella Italian Sharp,pasteurized cow's milk,United States,,"semi-soft, artisan",creamy,natural,"creamy, full-flavored, sharp",strong,no,...,0,0,0,0,0,0,0,0,0,0
1106,Moody Blue,pasteurized cow's milk,United States,Wisconsin,"semi-soft, artisan, blue-veined",creamy,natural,"creamy, nutty, subtle, tangy","rich, smokey",yes,...,0,0,0,0,0,0,0,0,0,0
1219,Oxford Blue,pasteurized cow's milk,England and United Kingdom,Buckinghamshire,"semi-soft, artisan, blue-veined",creamy,natural,"salty, sharp, spicy, tangy",strong,yes,...,0,0,0,0,0,0,0,0,0,0
1607,Stella Blue,pasteurized cow's milk,United States,Wisconsin,"semi-soft, artisan",crumbly,natural,tangy,rich,yes,...,0,0,0,0,0,0,0,0,0,0
1628,Strathdon Blue,pasteurized cow's milk,Scotland,Tain,semi-soft,creamy,,"creamy, spicy","aromatic, rich",yes,...,0,0,0,0,0,0,0,0,0,0
971,Little Black Bomber,pasteurized cow's milk,"Great Britain, United Kingdom and Wales",Rhyl,hard,creamy and smooth,waxed,"full-flavored, strong",rich,yes,...,0,0,0,0,0,0,1,0,0,0
799,Holey Cow,pasteurized cow's milk,United States,"West, California","semi-soft, artisan","creamy, crumbly and smooth",natural,"buttery, creamy","buttery, rich",yes,...,0,0,0,0,0,0,0,0,0,0


In [27]:
list(cheese_com['Name'].values)

['Abbaye de Belloc',
 'Abbaye de Belval',
 'Abbaye de Citeaux',
 'Abbaye de Timadeuc',
 'Abbaye du Mont des Cats',
 'Abbot’s Gold',
 'Abertam',
 'Abondance',
 'Acapella',
 'Accasciato',
 'Ackawi',
 'Acorn',
 'Adelost',
 'ADL Brick Cheese',
 'ADL Mild Cheddar',
 'Affidelice au Chablis',
 'Affineur Walo Rotwein Sennechäs',
 "Afuega'l Pitu",
 'Aged British Cheddar',
 'Aged Cashew & Blue Green Algae Cheese',
 'Aged Cashew & Brazil Nut Cheese',
 'Aged Cashew & Dulse Cheese',
 'Aged Cashew & Hemp Seed Cheese',
 'Aged Cashew Nut Cheese',
 'Aged Cashew Nut & Kale Cheese',
 'Aged Chelsea',
 'Aged Gouda',
 'Aggiano',
 'Ailsa Craig',
 'Airag',
 'Airedale',
 'Aisy Cendre',
 'Allgauer Emmentaler',
 'Allium Piper',
 'Alma Vorarlberger Alpkäse (3-5 months)',
 'Alma Vorarlberger Alpkäse (6-9 months)',
 'Alma Vorarlberger Bergkäse (10 months)',
 'Alma Vorarlberger Bergkäse (12 months)',
 'Alma Vorarlberger Bergkäse (6 months)',
 'Almnäs Tegel',
 'Alpe di Frabosa',
 'Alpha Tolman',
 'Alpicrème',
 'Alpin

In [28]:
cheese_com['Family'].unique()

array([nan, 'Cheddar', 'Tomme', 'Feta', 'Blue', 'Swiss Cheese', 'Gouda',
       'Mozzarella', 'Cottage', 'Pasta filata', 'Brie', 'Gruyere',
       'Parmesan', 'Camembert', 'blue', 'Monterey Jack', 'Caciotta',
       'Pecorino', 'Gorgonzola', 'Raclette', 'Cornish', 'Havarti',
       'Italian Cheese', 'Saint-Paulin'], dtype=object)

In [29]:
cheese_com[cheese_com['Name']== "Shropshire Blue"]

Unnamed: 0,Name,Milk,Country of origin,Region,Type,Texture,Rind,Flavour,Aroma,Vegetarian,...,Type_soft,Type_firm,Type_smear-ripened,Type_whey,Type_semi firm,Type_semi-hard,Type_hard,Type_fresh soft,Type_fresh firm,Type_processed
1562,Shropshire Blue,pasteurized cow's milk,Great Britain,Leicestershire,semi-soft,creamy and smooth,natural,"creamy, full-flavored, sharp, strong, tangy","rich, strong",yes,...,0,0,0,0,0,0,0,0,0,0


In [30]:
cheese_com[cheese_com['Family']=='Cheddar']

Unnamed: 0,Name,Milk,Country of origin,Region,Type,Texture,Rind,Flavour,Aroma,Vegetarian,...,Type_soft,Type_firm,Type_smear-ripened,Type_whey,Type_semi firm,Type_semi-hard,Type_hard,Type_fresh soft,Type_fresh firm,Type_processed
5,Abbot’s Gold,pasteurized cow's milk,"England, Great Britain and United Kingdom",North Yorkshire,semi-hard,"creamy, crumbly, dense and semi firm",natural,"mild, sweet, tangy",aromatic,yes,...,0,0,0,0,0,1,0,0,0,0
13,ADL Brick Cheese,pasteurized cow's milk,Canada,Prince Edward Island,semi-soft,"elastic, firm, open and soft",rindless,"buttery, mild, milky, subtle","buttery, sweet",,...,0,0,0,0,0,0,0,0,0,0
14,ADL Mild Cheddar,pasteurized cow's milk,Canada,Prince Edward Island,semi-hard,firm and springy,rindless,"acidic, buttery, milky, subtle",,,...,0,0,0,0,0,1,0,0,0,0
18,Aged British Cheddar,unpasteurized cow's milk,United States,NY,"hard, artisan","crumbly, crystalline and flaky",natural,sharp,smokey,,...,0,0,0,0,0,0,1,0,0,0
62,Amul Processed Cheese,pasteurized cow's or water buffalo's milk,India,Gujarat,"hard, processed",crumbly and dense,artificial,"buttery, creamy, salty",,yes,...,0,0,0,0,0,0,1,0,0,1
66,Anniversary Ale Cheddar,pasteurized cow's milk,United States,Northwest,"semi-hard, artisan",dense and firm,,"creamy, mild, sweet","floral, fruity",yes,...,0,0,0,0,0,1,0,0,0,0
73,Apple Walnut Smoked,pasteurized cow's milk,United States,Utah,"hard, artisan",creamy and smooth,,"nutty, smokey , sweet","nutty, smokey, sweet",yes,...,0,0,0,0,0,0,1,0,0,0
76,Applewood,pasteurized cow's milk,"England, Great Britain and United Kingdom",Somerset,semi-hard,crumbly and dense,natural,"smokey , spicy",smokey,yes,...,0,0,0,0,0,1,0,0,0,0
99,Avonlea Clothbound Cheddar,unpasteurized cow's milk,Canada,Prince Edward Island,"hard, artisan","creamy, crumbly and firm",cloth wrapped,"fruity, mushroomy, savory",,yes,...,0,0,0,0,0,0,1,0,0,0
115,Barber's 1833,pasteurized cow's milk,England,Somerset,"hard, artisan","creamy, crumbly and smooth",rindless,"savory, sweet",rich,yes,...,0,0,0,0,0,0,1,0,0,0


In [31]:
df[df['Family_Cheddar']==0]#['Name']

Unnamed: 0_level_0,Fat_less than 0.15%,Fat_0.16-3%,Fat_4-19%,Fat_20-39%,Fat_40-59%,Fat_60-74%,Fat_75%+,Milk_vegan,Milk_cow,Milk_goat,...,Country of origin_Argentina,Country of origin_China,Country of origin_Poland,Country of origin_Czech Republic,Country of origin_Wales,Country of origin_Canada,Country of origin_Spain,Country of origin_Iran,Country of origin_Switzerland,Country of origin_Armenia
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbaye de Belloc,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye de Belval,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye de Citeaux,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye de Timadeuc,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye du Mont des Cats,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abertam,0,0,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
Abondance,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Acapella,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
Accasciato,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Ackawi,0,0,0,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0


In [32]:
query = input('Enter a Cheese Type: ')

Enter a Cheese Type: 


In [33]:
query

''

In [34]:
df_not_cheddar = df[df['Family_Cheddar']==0]

In [35]:
mask1 = df['Family_Cheddar']==0
mask2 = df.index== 'ADL Mild Cheddar'

mask = mask1 | mask2
df_not_cheddar = df[mask]

In [36]:
recommender = pairwise_distances(df_not_cheddar, metric='cosine')

In [37]:
recommender_df = pd.DataFrame(recommender, columns=df_not_cheddar.index, index=df_not_cheddar.index)

In [38]:
recommender_df

Name,Abbaye de Belloc,Abbaye de Belval,Abbaye de Citeaux,Abbaye de Timadeuc,Abbaye du Mont des Cats,Abertam,Abondance,Acapella,Accasciato,Ackawi,...,Za'atar Burrata,Zamorano,Zanetti Grana Padano,Zanetti Parmigiano Reggiano,"Zartschmelzend, Kräftig Würziger Rahm-Hartkäse",Zelu Koloria,Zigljen Iz Extra Mature,Zigljen Iz Mosta,Zimbro,Zwitser
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbaye de Belloc,0.000000,7.165266e-01,0.599108,6.969542e-01,0.785714,0.677671,0.546257,1.000000e+00,0.597089,0.832817,...,0.740719,8.218258e-01,0.629375,0.677671,0.571429,0.611078,0.571429,0.622036,0.661938,0.571429
Abbaye de Belval,0.716527,2.220446e-16,0.646447,3.318469e-01,0.527544,0.786799,0.571254,8.750000e-01,0.573599,0.778837,...,0.828501,7.642977e-01,0.901942,0.893400,0.716527,0.742752,0.905509,0.833333,0.776393,0.811018
Abbaye de Citeaux,0.599108,6.464466e-01,0.000000,6.220355e-01,0.398662,0.773866,0.575563,9.116117e-01,0.849244,0.739357,...,0.757464,9.166667e-01,0.722650,0.773866,0.732739,0.818098,0.866369,0.882149,0.762829,0.732739
Abbaye de Timadeuc,0.696954,3.318469e-01,0.622036,2.220446e-16,0.494924,0.886039,0.633321,8.663694e-01,0.658118,0.763567,...,0.816660,8.740118e-01,0.895172,0.886039,0.696954,0.633321,0.797969,0.732739,0.880477,0.797969
Abbaye du Mont des Cats,0.785714,5.275444e-01,0.398662,4.949237e-01,0.000000,0.758253,0.611078,9.055089e-01,0.838835,0.609905,...,0.740719,8.218258e-01,0.851750,0.838835,0.785714,0.805539,0.714286,0.748024,0.830969,0.857143
Abertam,0.677671,7.867993e-01,0.773866,8.860394e-01,0.758253,0.000000,0.634364,8.933996e-01,0.727273,0.811392,...,0.926873,4.974811e-01,0.749127,0.727273,0.758253,0.561237,0.677671,0.715732,0.809307,0.758253
Abondance,0.546257,5.712535e-01,0.575563,6.333206e-01,0.611078,0.634364,0.000000,8.285014e-01,0.634364,0.848283,...,0.764706,6.766192e-01,0.596396,0.707491,0.546257,0.705882,0.740719,0.771335,0.769911,0.675898
Acapella,1.000000,8.750000e-01,0.911612,8.663694e-01,0.905509,0.893400,0.828501,2.220446e-16,0.786799,0.778837,...,0.657003,7.642977e-01,1.000000,1.000000,1.000000,0.828501,1.000000,1.000000,0.888197,0.905509
Accasciato,0.597089,5.735986e-01,0.849244,6.581183e-01,0.838835,0.727273,0.634364,7.867993e-01,0.000000,0.811392,...,0.853746,7.989924e-01,0.749127,0.727273,0.758253,0.707491,0.677671,0.715732,0.809307,0.758253
Ackawi,0.832817,7.788371e-01,0.739357,7.635669e-01,0.609905,0.811392,0.848283,7.788371e-01,0.811392,0.000000,...,0.747139,7.914856e-01,0.884337,0.874261,0.888544,0.797711,0.721361,0.803411,0.868124,0.888544


In [39]:
recommender_df['Fox Hill Gouda'].sort_values()[1:11]

Name
Peppercorn Gouda                  0.433053
Salsa Asiago                      0.459262
Mont St-Benoît                    0.459262
Galbani                           0.485504
St-Fidèle Swiss                   0.496047
Meyer Vintage Gouda               0.502532
Legacy                            0.502532
Trappiste de Bricquebec           0.513136
Aged Cashew & Hemp Seed Cheese    0.513136
Brimstone                         0.523710
Name: Fox Hill Gouda, dtype: float64

In [40]:
recommender_df['ADL Mild Cheddar'].sort_values()[1:11]

Name
St-Fidèle Swiss      0.481125
Vampire Slayer       0.503861
Coalho               0.538462
Paesanella Fresco    0.555250
Pacific Rock         0.555250
Liliputas            0.581879
Sveciaost            0.581879
Mozzarellissima      0.583975
Bocconcini           0.583975
New Moon             0.607768
Name: ADL Mild Cheddar, dtype: float64

In [41]:
recommender_df['Brillat-Savarin'].sort_values()[1:11]

Name
Saint-Paulin        0.378941
Boursault           0.452277
Chiriboga Blue      0.452277
Grand Vatel         0.452277
Romans Part Dieu    0.455051
Lady Jane           0.466667
Saint Félicien      0.466886
Saint-André         0.466886
Charolais           0.472954
Prince-Jean         0.478251
Name: Brillat-Savarin, dtype: float64

In [42]:
name_list = list(cheese_com['Name'].values)
name_list.append('-')

def name_f(name):
    return name

name = interactive(
    name_f,
    name = sorted(name_list)
)

In [43]:
def cheese_type_f(cheese_type):
    return cheese_type

cheese_type = interactive(
    cheese_type_f,
    cheese_type=sorted([
        'whey', 'soft', 'semi-hard', 'fresh soft', 'semi-soft', 'soft-ripened', 'fresh firm', 'processed', 
        'hard', 'smear-ripened', 'artisan', 'firm', 'semi firm', 'brined', 'blue-veined', '-'])
)

In [44]:
def texture_f(texture):
    return texture

texture = interactive(
    texture_f,
    texture=sorted(['flaky', 'fluffy', 'dry', 'close', 'open', 'runny', 'semi firm', 'brittle', 
                    'crystalline', 'chalky', 'elastic', 'springy', 'soft', 'stringy', 'chewy', 
                    'soft-ripened', 'oily', 'supple', 'creamy', 'grainy', 'compact', 'sticky', 
                    'buttery', 'dense', 'firm', 'spreadable', 'crumbly', 'smooth', '-'])
)

In [45]:
def flavor_f(flavor):
    return flavor

def flavor_f(flavor):
    return flavor

flavor = interactive(
    flavor_f,
    flavor=sorted(['fruity', 'bitter', 'milky', 'oceanic', 'grassy', 'mild', 'piquant', 'spicy', 
            'strong', 'earthy', 'citrusy', 'acidic', 'salty', 'yeasty', 'woody', 'lemony', 
            'pronounced', 'tart', 'savory', 'licorice', 'sweet', 'herbaceous', 'butterscotch', 
            'pungent', 'sour', 'garlicky', 'crunchy', 'creamy', 'nutty', 'meaty', 'rustic', 'sharp', 
            'umami', 'full-flavored', 'smokey', 'floral', 'subtle', 'buttery', 'caramel', 'mineral', 
            'burnt caramel', 'mellow', 'vegetal', 'tangy', 'smokey ', 'mushroomy', 'smooth', '-'])
)

In [46]:
def aroma_f(aroma):
    return aroma

aroma = interactive(
    aroma_f,
    aroma=sorted(['fruity', 'rich', 'toasty', 'milky', 'fermented', 'grassy', 'lanoline', 'fresh', 
           'mild', 'goaty', 'barnyardy', 'earthy', 'strong', 'aromatic', 'spicy', 'pleasant', 
           'mushroom', 'yeasty', 'herbal', 'woody', 'pronounced', 'musty', 'sweet', 'pungent', 
           'clean', 'ripe', 'garlicky', 'perfumed', 'nutty', 'raw nut', 'smokey', 'floral', 'subtle', 
           'stinky', 'buttery', 'caramel', 'lactic', '-'])
)

In [47]:
def rind_f(rind):
    return rind

rind = interactive(
    rind_f,
    rind=sorted(['leaf wrapped', 'rindless', 'ash coated', 'artificial', 'cloth wrapped', 'waxed', 
           'plastic', 'mold ripened', 'washed', 'bloomy', 'natural', '-'])
)

In [48]:
def vegetarian_f(vegetarian):
    return vegetarian

vegetarian = interactive(
    vegetarian_f,
    vegetarian=['-' ,'yes', 'no']
)

In [49]:
def family_f(family):
    return family

family = interactive(
    family_f,
    family=sorted(['Pecorino', 'Cheddar', 'Feta', 'Italian Cheese', 'Camembert', 'Blue', 
            'Raclette', 'Monterey Jack', 'Cottage', 'Gruyere', 'Pasta filata', 'Havarti', 
            'Parmesan', 'Swiss Cheese', 'Mozzarella', 'Cornish', 'Gorgonzola', 'Brie', 'Caciotta', 
            'Gouda', 'Tomme', 'Saint-Paulin', '-'])
)

In [50]:
def country_f(country):
    return country

country = interactive(
    country_f,
    country=sorted(['England', 'Austria', 'Syria', 'Pakistan', 'Netherlands', 'Afghanistan', 
             'Brazil', 'France', 'Israel', 'India', 'Poland', 'Nepal', 'Wales', 'Jordan', 
             'Holland', 'Bulgaria', 'Sweden', 'China', 'Denmark', 'Scotland', 'Ireland', 
             'Bangladesh', 'Belarus', 'Argentina', 'Australia', 'Ukraine', 'Eastern Mediterranean', 
             'Mauritania', 'Italy', 'Middle East', 'Finland', 'Great Britain', 'New Zealand', 'Palestine', 
             'Serbia', 'Armenia', 'Lebanon', 'Albania', 'Switzerland', 'Mexico', 'Andorra', 'Greece', 'Croatia', 
             'Romania', 'Japan', 'Isle of Man', 'Cyprus', 'Belgium', 'Azerbaijan', 'United Kingdom', 
             'Czech Republic', 'Canada', 'Spain', 'Egypt', 'Turkey', 'Iran', 'Russia', 'Chile', 'Norway', 
             'Iraq', 'Hungary', 'United States', 'Swaziland', 'Mongolia', 
             'Lithuania', 'Portugal', 'Tibet', 'Caribbean', 'Macedonia', 'Georgia', 'Slovakia', 'Iceland', 
             'Germany', '-'])
)

In [51]:
def milk_f(milk):
    return milk

milk = interactive(
    milk_f,
    milk=sorted(['vegan', 'cow', 'sheep', 'goat', 'mare', 'buffalo', 'water buffalo', 'camel', 'donkey', 
                'moose', 'reindeer', 'yak', '-'])
)

In [52]:
def pasteurized_f(pasteurized):
    return pasteurized

pasteurized = interactive(
    pasteurized_f,
    pasteurized=['-' , 'pasteurized', 'unpasteurized']
)

In [53]:
def fat_content_f(fat_content):
    return fat_content

fat_content = interactive(
    fat_content_f,
    fat_content=['-', 'less than 0.15%', '0.16-3%', '4-19%', 
                 '20-39%', '40-59%', '60-74%', '75%+']
)

In [54]:
display(name)

display(cheese_type)

display(texture)

display(flavor)

display(aroma)

display(rind)

display(vegetarian)

display(family)

display(country)

display(milk)

display(pasteurized)

display(fat_content)




name_drop = name.result


if cheese_type.result == '-':
    cheese_type_drop = cheese_type.result
else:
    cheese_type_drop = "Type_" + cheese_type.result

    
if texture.result == '-':
    texture_drop = texture.result
else:
    texture_drop = "Texture_" + texture.result

    
if flavor.result == '-':
    flavor_drop = flavor.result
else:
    flavor_drop = "Flavour_" + flavor.result

    
if aroma.result == '-':
    aroma_drop = aroma.result
else:
    aroma_drop = "Aroma_" + aroma.result

    
if rind.result == '-':
    rind_drop = rind.result
else:
    rind_drop = "Rind_" + rind.result

    
if vegetarian.result == '-':
    vegetarian_drop = vegetarian.result
else:
    vegetarian_drop = "Vegetarian_" + vegetarian.result

    
if family.result == '-':
    family_drop = family.result
else:
    family_drop = "Family_" + family.result

if country.result == '-':
    country_drop = country.result
else:
    country_drop = "Country of origin_" + country.result

    
if milk.result == '-':
    milk_drop = milk.result
else:
    milk_drop = "Milk_" + milk.result

    
if pasteurized.result == '-':
    pasteurized_drop = pasteurized.result
else:
    pasteurized_drop = "Treatment_" + pasteurized.result

    
if fat_content.result == '-':
    fat_content_drop = fat_content.result
else:
    fat_content_drop = "Fat_" + fat_content.result
    
    
def run_all(ev):
    display(Javascript('IPython.notebook.execute_cells_below()'))

button = widgets.Button(description="Recommend")
button.on_click(run_all)
display(button)

<IPython.core.display.Javascript object>

interactive(children=(Dropdown(description='name', index=231, options=('-', 'ADL Brick Cheese', 'ADL Mild Ched…

interactive(children=(Dropdown(description='cheese_type', index=3, options=('-', 'artisan', 'blue-veined', 'br…

interactive(children=(Dropdown(description='texture', options=('-', 'brittle', 'buttery', 'chalky', 'chewy', '…

interactive(children=(Dropdown(description='flavor', options=('-', 'acidic', 'bitter', 'burnt caramel', 'butte…

interactive(children=(Dropdown(description='aroma', options=('-', 'aromatic', 'barnyardy', 'buttery', 'caramel…

interactive(children=(Dropdown(description='rind', index=10, options=('-', 'artificial', 'ash coated', 'bloomy…

interactive(children=(Dropdown(description='vegetarian', options=('-', 'yes', 'no'), value='-'), Output()), _d…

interactive(children=(Dropdown(description='family', options=('-', 'Blue', 'Brie', 'Caciotta', 'Camembert', 'C…

interactive(children=(Dropdown(description='country', index=26, options=('-', 'Afghanistan', 'Albania', 'Andor…

interactive(children=(Dropdown(description='milk', options=('-', 'buffalo', 'camel', 'cow', 'donkey', 'goat', …

interactive(children=(Dropdown(description='pasteurized', options=('-', 'pasteurized', 'unpasteurized'), value…

interactive(children=(Dropdown(description='fat_content', options=('-', 'less than 0.15%', '0.16-3%', '4-19%',…

Button(description='Recommend', style=ButtonStyle())

interactive(children=(Dropdown(description='name', options=('-', 'ADL Brick Cheese', 'ADL Mild Cheddar', 'Abba…

interactive(children=(Dropdown(description='cheese_type', options=('-', 'artisan', 'blue-veined', 'brined', 'f…

interactive(children=(Dropdown(description='texture', options=('-', 'brittle', 'buttery', 'chalky', 'chewy', '…

interactive(children=(Dropdown(description='flavor', options=('-', 'acidic', 'bitter', 'burnt caramel', 'butte…

interactive(children=(Dropdown(description='aroma', options=('-', 'aromatic', 'barnyardy', 'buttery', 'caramel…

interactive(children=(Dropdown(description='rind', options=('-', 'artificial', 'ash coated', 'bloomy', 'cloth …

interactive(children=(Dropdown(description='vegetarian', options=('-', 'yes', 'no'), value='-'), Output()), _d…

interactive(children=(Dropdown(description='family', options=('-', 'Blue', 'Brie', 'Caciotta', 'Camembert', 'C…

interactive(children=(Dropdown(description='country', options=('-', 'Afghanistan', 'Albania', 'Andorra', 'Arge…

interactive(children=(Dropdown(description='milk', options=('-', 'buffalo', 'camel', 'cow', 'donkey', 'goat', …

interactive(children=(Dropdown(description='pasteurized', options=('-', 'pasteurized', 'unpasteurized'), value…

interactive(children=(Dropdown(description='fat_content', options=('-', 'less than 0.15%', '0.16-3%', '4-19%',…

Button(description='Recommend', style=ButtonStyle())

In [55]:
# create a null mask that does not affect the dataframe.  for the sake of getting it done
# i arbitrarily chose the state of being yak milk or not being yak milk
# because yak milk is funny and this does the job
yak_mask = df["Milk_yak"] == 1
no_yak_mask = df["Milk_yak"] == 0

null_mask = yak_mask | no_yak_mask

In [56]:
if name_drop == '-':
    name_mask = null_mask
else:
    name_mask = df.index == name_drop

In [57]:
if cheese_type_drop == '-':
    type_mask = null_mask
else:
    type_mask = df[cheese_type_drop] == 1

In [58]:
if texture_drop == '-':
    texture_mask = null_mask
else:
    texture_mask = df[texture_drop] == 1

In [59]:
if flavor_drop == '-':
    flavor_mask = null_mask
else:
    flavor_mask = df[flavor_drop] == 1

In [60]:
if aroma_drop == '-':
    aroma_mask = null_mask
else:
    aroma_mask = df[aroma_drop] == 1

In [61]:
if rind_drop == '-':
    rind_mask = null_mask
else:
    rind_mask = df[rind_drop] == 1

In [62]:
if vegetarian_drop == '-':
    vegetarian_mask = null_mask
else:
    vegetarian_mask = df[vegetarian_drop] == 1

In [63]:
if family_drop == '-':
    family_mask = null_mask
else:
    family_mask = df[family_drop] == 1

In [64]:
if country_drop == '-':
    country_mask = null_mask
else:
    country_mask = df[country_drop] == 1

In [65]:
if milk_drop == '-':
    milk_mask = null_mask
else:
    milk_mask = df[milk_drop] == 1

In [66]:
if pasteurized_drop == '-':
    pasteurized_mask = null_mask
else:
    pasteurized_mask = df[pasteurized_drop] == 1

In [67]:
if fat_content_drop == '-':
    fat_mask = null_mask
else:
    fat_mask = df[fat_content_drop] == 1

In [68]:
df[type_mask & texture_mask & flavor_mask & aroma_mask & rind_mask & vegetarian_mask & family_mask & country_mask & milk_mask & pasteurized_mask & fat_mask]

Unnamed: 0_level_0,Fat_less than 0.15%,Fat_0.16-3%,Fat_4-19%,Fat_20-39%,Fat_40-59%,Fat_60-74%,Fat_75%+,Milk_vegan,Milk_cow,Milk_goat,...,Country of origin_Argentina,Country of origin_China,Country of origin_Poland,Country of origin_Czech Republic,Country of origin_Wales,Country of origin_Canada,Country of origin_Spain,Country of origin_Iran,Country of origin_Switzerland,Country of origin_Armenia
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbaye de Belloc,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye de Belval,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye de Citeaux,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye de Timadeuc,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye du Mont des Cats,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbot’s Gold,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abertam,0,0,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
Abondance,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Acapella,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
Accasciato,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [69]:
df[null_mask]

Unnamed: 0_level_0,Fat_less than 0.15%,Fat_0.16-3%,Fat_4-19%,Fat_20-39%,Fat_40-59%,Fat_60-74%,Fat_75%+,Milk_vegan,Milk_cow,Milk_goat,...,Country of origin_Argentina,Country of origin_China,Country of origin_Poland,Country of origin_Czech Republic,Country of origin_Wales,Country of origin_Canada,Country of origin_Spain,Country of origin_Iran,Country of origin_Switzerland,Country of origin_Armenia
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbaye de Belloc,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye de Belval,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye de Citeaux,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye de Timadeuc,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye du Mont des Cats,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbot’s Gold,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abertam,0,0,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
Abondance,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Acapella,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
Accasciato,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [70]:
full_mask = type_mask & texture_mask & flavor_mask & aroma_mask & rind_mask & vegetarian_mask & family_mask & country_mask & milk_mask & pasteurized_mask & fat_mask

In [71]:
df[full_mask]

Unnamed: 0_level_0,Fat_less than 0.15%,Fat_0.16-3%,Fat_4-19%,Fat_20-39%,Fat_40-59%,Fat_60-74%,Fat_75%+,Milk_vegan,Milk_cow,Milk_goat,...,Country of origin_Argentina,Country of origin_China,Country of origin_Poland,Country of origin_Czech Republic,Country of origin_Wales,Country of origin_Canada,Country of origin_Spain,Country of origin_Iran,Country of origin_Switzerland,Country of origin_Armenia
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbaye de Belloc,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye de Belval,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye de Citeaux,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye de Timadeuc,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye du Mont des Cats,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbot’s Gold,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abertam,0,0,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
Abondance,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Acapella,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
Accasciato,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [72]:
df[name_mask | full_mask]



Unnamed: 0_level_0,Fat_less than 0.15%,Fat_0.16-3%,Fat_4-19%,Fat_20-39%,Fat_40-59%,Fat_60-74%,Fat_75%+,Milk_vegan,Milk_cow,Milk_goat,...,Country of origin_Argentina,Country of origin_China,Country of origin_Poland,Country of origin_Czech Republic,Country of origin_Wales,Country of origin_Canada,Country of origin_Spain,Country of origin_Iran,Country of origin_Switzerland,Country of origin_Armenia
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbaye de Belloc,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye de Belval,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye de Citeaux,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye de Timadeuc,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye du Mont des Cats,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbot’s Gold,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abertam,0,0,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
Abondance,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Acapella,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
Accasciato,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [73]:
# if no cheese name was entered, then all that needs to be returned is the dataframe filtered through all
# of the masks specified by the user
if name_drop == '-':
    # save the original dataframe that has been filtered as a dataframe named 'display_frame'
    display_frame = df[full_mask]

    
# if a cheese name is entered by the user, do the following
else:
    # first create a dataframe that is filtered by all of the masks specified by the user, and also
    # includes the cheese that was named by the user
    df2 = df[name_mask |full_mask]
    
    # calculate the cosine similarity for each cheese in this dataframe using the pairwise_distances function
    # this will return a square matrix, comparing every cheese with every other cheese in the dataframe
    recommender = pairwise_distances(df2, metric='cosine')

    # Create a distances DataFrame the matrix defined above, filling in the cheese names (which were in the index
    # of the old dataframe) as the column names and indicies for the recommender dataframe
    recommender_df = pd.DataFrame(recommender, columns=df2.index, index=df2.index)

    # use the recomender to return the first 10 cheeses (NOT including the cheese named, hence sort [1:11])
    # put this list into a dataframe called 'display_frame'
    display_frame = pd.DataFrame(recommender_df[name_drop].sort_values()[1:11])

    
# if there were no cheeses in the dataset with the sepcified error message (the length will be zero),
# then print this error message
if len(display_frame) == 0:
    print("Sorry! There are no cheeses that have all of those specific features")
    print("Please adjust the selected features and try again")
  
# if no error messages, display the dataframe 
else:
    display(display_frame)


Unnamed: 0_level_0,Fat_less than 0.15%,Fat_0.16-3%,Fat_4-19%,Fat_20-39%,Fat_40-59%,Fat_60-74%,Fat_75%+,Milk_vegan,Milk_cow,Milk_goat,...,Country of origin_Argentina,Country of origin_China,Country of origin_Poland,Country of origin_Czech Republic,Country of origin_Wales,Country of origin_Canada,Country of origin_Spain,Country of origin_Iran,Country of origin_Switzerland,Country of origin_Armenia
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbaye de Belloc,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye de Belval,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye de Citeaux,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye de Timadeuc,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbaye du Mont des Cats,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abbot’s Gold,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Abertam,0,0,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
Abondance,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Acapella,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
Accasciato,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [74]:
cheese_com[cheese_com['Name'] == name_drop]

Unnamed: 0,Name,Milk,Country of origin,Region,Type,Texture,Rind,Flavour,Aroma,Vegetarian,...,Type_soft,Type_firm,Type_smear-ripened,Type_whey,Type_semi firm,Type_semi-hard,Type_hard,Type_fresh soft,Type_fresh firm,Type_processed


In [75]:
from IPython.display import display
from ipywidgets import Checkbox

box = Checkbox(False, description='checker')
display(box)

def changed(b):
    print(b)

box.observe(changed)

Checkbox(value=False, description='checker')

In [76]:
widgets.SelectMultiple(
    options=['Apples', 'Oranges', 'Pears'],
    value=['Oranges'],
    #rows=10,
    description='Fruits',
    disabled=False
)

SelectMultiple(description='Fruits', index=(1,), options=('Apples', 'Oranges', 'Pears'), value=('Oranges',))

In [77]:
widgets.Dropdown(
    options=['Apples', 'Oranges', 'Pears'],
    description='Groups',
    rows=len(['Apples', 'Oranges', 'Pears']),
    layout=Layout(width="100%")
)

NameError: name 'Layout' is not defined