In [12]:
import pandas as pd
import numpy as np

from sklearn.feature_extraction.text  import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.preprocessing import MultiLabelBinarizer

from nltk import RegexpTokenizer
from nltk.stem.porter import PorterStemmer
from nltk.corpus import stopwords
import regex as re
import nltk
#nltk.download('stopwords')

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
sns.set_style('white')
sns.set_palette('Set3')

#import warnings
#warnings.filterwarnings("ignore", category=DeprecationWarning) 
#warnings.filterwarnings("ignore", category=FutureWarning) 

In [13]:
# Read in data 

In [30]:
df = pd.read_csv('./data/coffee_dataset.csv')
df.head()


Unnamed: 0,Name,Varietal,Producer,Origin,Processing,Cupping notes,Brewing recipe,package weight,price,Recommend for,Flavor
0,HARU,74112 1377,Abayneh family & 453 smallholder farmers,"Foge,Yigracheffe",Fully washed,"Earl grey tea, Jasmine, tangerince candy, oran...",,200g,21,"Espresso & Filter, morning","sweet, floral, tea-like, citrus fruit"
1,KEBEDE MARO,"Wolisho, Dega",KEBEDE MARO,"Konga, Yigracheffe",Natural,"Passionfruit, Cordial, Mango puree, Rooibos te...",,200g,22,"Espresso & Filter, morning","sweet, tea-like, tropical fruit"
2,AQUIARES ESTATE,Esperanza,Don Alfonso,"Turrialba, Costa Rica",Honey process,"Green plums, baked granny apples, light honey,...",,200g,22,"Espresso & Filter, morning","sweet, tea-like, stone fruit, pome fruit"
3,DUROMINA,"74110, 74112",Various smallholder farmers,"Agaro, Ethiopia",Fully washed,"Bergamot, orange blossom, oolong tea, yellow p...",,200g,21,"Espresso & Filter, morning","tea-like, citrus fruit, stone fruit, pome fruit"
4,KEBEDE MARO,"Wolisho, Dega",KEBEDE MARO,"Konga, Ethiopia",Natural,"Rose cordial, evaporated milk, hawaiian papaya...",,200g,22,"Espresso & Filter, morning,afternoon,anytime","sweet, floral, tea-like, stone fruit, tropical..."


In [31]:
df = df.drop(["Brewing recipe","package weight", "price"], axis=1)
df

Unnamed: 0,Name,Varietal,Producer,Origin,Processing,Cupping notes,Recommend for,Flavor
0,HARU,74112 1377,Abayneh family & 453 smallholder farmers,"Foge,Yigracheffe",Fully washed,"Earl grey tea, Jasmine, tangerince candy, oran...","Espresso & Filter, morning","sweet, floral, tea-like, citrus fruit"
1,KEBEDE MARO,"Wolisho, Dega",KEBEDE MARO,"Konga, Yigracheffe",Natural,"Passionfruit, Cordial, Mango puree, Rooibos te...","Espresso & Filter, morning","sweet, tea-like, tropical fruit"
2,AQUIARES ESTATE,Esperanza,Don Alfonso,"Turrialba, Costa Rica",Honey process,"Green plums, baked granny apples, light honey,...","Espresso & Filter, morning","sweet, tea-like, stone fruit, pome fruit"
3,DUROMINA,"74110, 74112",Various smallholder farmers,"Agaro, Ethiopia",Fully washed,"Bergamot, orange blossom, oolong tea, yellow p...","Espresso & Filter, morning","tea-like, citrus fruit, stone fruit, pome fruit"
4,KEBEDE MARO,"Wolisho, Dega",KEBEDE MARO,"Konga, Ethiopia",Natural,"Rose cordial, evaporated milk, hawaiian papaya...","Espresso & Filter, morning,afternoon,anytime","sweet, floral, tea-like, stone fruit, tropical..."
5,IJEN LESTARI,Mixed Variety,Dandy Dharmawan,"East Java, Indonesia","72 hours carbonic maceration, dried nutural","Grenadine syrup, tinned pineapple, dark chocol...","Espresso & Filter,morning, afternoon","chocolaty, sweet, stone fruit, tropical fruit,..."
6,BEDESSA,"74110, 74112",Various smallholder farmers,"Oromia, Ethopia",Fully washed,"Orange blossom, Earl grey, papaya, honeysuckle...","Espresso & Filter,morning","sweet, tea-like, citrus fruit, tropical fruit,..."
7,MAHEMBE GITUNTU,Red Bourbon,Justin Musabyimana,"Nyamesheke, Rwanda",Fully washed,"Poached apples, mixed berries, spiced tea, van...","Espresso & Filter,morning","sweet, tea-like, pome fruit"
8,V,Castillo,Santiago London,"Quindio, Colombia","24 hour Anaerobic fermentation, 24 hour Aerobi...","Kyoho grape, Raspeberry compote, tangerine, go...","filter,morning","citrus fruit, berry, tropical fruit, alcohol"
9,Karora,Red Bourbon,Francine Nyiramana & Immaculee Mukamana,"Karongi, Rwanda",Dried on raised beds for 3-4 weeks,"Pitted Prunes, milk tea, starfruit, brown suga...","Espresso & Filter,afternoon,evening, anytime","sweet, tea-like, tropical fruit, dried fruit"


In [32]:
# Define the mapping of cupping notes to flavors
# can be further improved

cupping_notes_to_flavors = {


    'nutty': ['nutty','almond','hazelnut','peanuts','macadamias'],
    'chocolaty': ['chocolate', 'cacao', 'cocoa','dark chocolate','milk chocolate'],
    'spices': ['spices','clove','cinnamon','nutmeg','anise'],
    'sweet': [ 'sugar','treacle', 'syrup','molasses','mapple syrup','caramelized','honey','toffee','cordial','caramelized','caramel','vanilla','candy'],
    'floral': ['floral','chamoile','rose','jasmine','elderflower','flower'],
    'tea-like': ['tea-like', 'tea','darjeeling', 'ceylon', 'oolong', 'tencha', 'hibiscus', 'tisane','teh-o siew dai','matcha','roselle','earl grey'],
    'citrus fruit': ['citrus', 'bergamot', 'pomelo', 'grapefruit', 'orange', 'mandarin', 'tangerine', 'lemon', 'lime'],

    'berry': ['berry-like', 'raspberry', 'strawberry', 'blueberry', 'blackcurrant', 'redcurrant', 'goji','cranberry','gooseberry','grape'],
    'stone fruit': ['stone fruit', 'apricot', 'cherry','cherries', 'nectarine', 'peach', 'plum'],
    'tropical fruit': ['mango', 'avocado', 'coconut', 'date', 'durian', 'kiwano', 'kiwi', 'longan', 'lychee', 'mangosteen', 'olive', 'papaya', 'passion fruit', 'peach palm fruit', 'persimmon', 'rambutan', 'sapodilla', 'tamarind', 'guava', 'banana', 'pineapple', 'jackfruit', 'breadfruit', 'dragon fruit', 'star fruit'],
    'pome fruit': ['apple', 'pear', 'quince'],
    'dried fruit': ['raisin', 'prune', 'quince','dried cranberry'],

     'herb-like': ['vegetative', 'herb-like', 'eucalyptus', 'acacia', 'rosemary', 'thyme','liquorice','honeysuckle'],

    'cereal': ['malt','Grain','digestive biscuit'],

    'alcohol': ['winey','whiskey','fermented','overripe','wine','kefir','rum']
    
    #     'papery/musty': ['papery', 'musty','woody'],
    
    #     'roasted': ['roasted','brown', 'roast','smoky','ashy','acrid'],
    #     'fermented': ['fermented'],
#     'vegetative': ['vegetative'],
    #     'smoky': ['smoky'],
    #     'sweet': ['sweet', 'honey', 'sugar', 'syrup'],
#     'salty': ['salty'],
#     'savory': ['savory'],
    #     'other fruit': ['Coconut','passionfruit', 'Cherry','Pomegranate','Pineapple','Grape','Apple','Peach','pear',],
    #     'vanilla': ['vanilla'],
#     'fruity': ['fruity', 'fruit'],
    #     'chemical': ['chemical'],
#     'baked': ['baked'],
    #     'sour': ['sour'],
    #     'bitter': ['bitter'],
    
}

# Create an empty list to store the categorized flavors
flavors = []

# Loop through each cupping note
for note in df['Cupping notes'].str.lower():
    note_flavors = []
    # Loop through each flavor and check if the note contains it
    for flavor, keywords in cupping_notes_to_flavors.items():
        if any(keyword in note for keyword in keywords):
            note_flavors.append(flavor)
    flavors.append(note_flavors)
    
# Add the list of flavors to the DataFrame as a new column
df['Flavor'] = flavors

df

Unnamed: 0,Name,Varietal,Producer,Origin,Processing,Cupping notes,Recommend for,Flavor
0,HARU,74112 1377,Abayneh family & 453 smallholder farmers,"Foge,Yigracheffe",Fully washed,"Earl grey tea, Jasmine, tangerince candy, oran...","Espresso & Filter, morning","[sweet, floral, tea-like, citrus fruit]"
1,KEBEDE MARO,"Wolisho, Dega",KEBEDE MARO,"Konga, Yigracheffe",Natural,"Passionfruit, Cordial, Mango puree, Rooibos te...","Espresso & Filter, morning","[sweet, tea-like, tropical fruit]"
2,AQUIARES ESTATE,Esperanza,Don Alfonso,"Turrialba, Costa Rica",Honey process,"Green plums, baked granny apples, light honey,...","Espresso & Filter, morning","[sweet, tea-like, stone fruit, pome fruit]"
3,DUROMINA,"74110, 74112",Various smallholder farmers,"Agaro, Ethiopia",Fully washed,"Bergamot, orange blossom, oolong tea, yellow p...","Espresso & Filter, morning","[tea-like, citrus fruit, stone fruit, pome fruit]"
4,KEBEDE MARO,"Wolisho, Dega",KEBEDE MARO,"Konga, Ethiopia",Natural,"Rose cordial, evaporated milk, hawaiian papaya...","Espresso & Filter, morning,afternoon,anytime","[sweet, floral, tea-like, stone fruit, tropica..."
5,IJEN LESTARI,Mixed Variety,Dandy Dharmawan,"East Java, Indonesia","72 hours carbonic maceration, dried nutural","Grenadine syrup, tinned pineapple, dark chocol...","Espresso & Filter,morning, afternoon","[chocolaty, sweet, stone fruit, tropical fruit..."
6,BEDESSA,"74110, 74112",Various smallholder farmers,"Oromia, Ethopia",Fully washed,"Orange blossom, Earl grey, papaya, honeysuckle...","Espresso & Filter,morning","[sweet, tea-like, citrus fruit, tropical fruit..."
7,MAHEMBE GITUNTU,Red Bourbon,Justin Musabyimana,"Nyamesheke, Rwanda",Fully washed,"Poached apples, mixed berries, spiced tea, van...","Espresso & Filter,morning","[sweet, tea-like, pome fruit]"
8,V,Castillo,Santiago London,"Quindio, Colombia","24 hour Anaerobic fermentation, 24 hour Aerobi...","Kyoho grape, Raspeberry compote, tangerine, go...","filter,morning","[citrus fruit, berry, tropical fruit, alcohol]"
9,Karora,Red Bourbon,Francine Nyiramana & Immaculee Mukamana,"Karongi, Rwanda",Dried on raised beds for 3-4 weeks,"Pitted Prunes, milk tea, starfruit, brown suga...","Espresso & Filter,afternoon,evening, anytime","[sweet, tea-like, tropical fruit, dried fruit]"


In [33]:
df['Flavor'].value_counts()

[sweet, tea-like, pome fruit]                                           2
[sweet, floral, tea-like, citrus fruit]                                 1
[chocolaty, sweet, pome fruit]                                          1
[spices, sweet, tea-like, citrus fruit, pome fruit]                     1
[chocolaty, sweet, berry, stone fruit]                                  1
[chocolaty, sweet, stone fruit]                                         1
[sweet, floral, tea-like, stone fruit, pome fruit]                      1
[nutty, chocolaty, sweet, stone fruit, dried fruit]                     1
[chocolaty, sweet, floral, berry, stone fruit]                          1
[sweet, floral, berry]                                                  1
[sweet, floral, citrus fruit, tropical fruit]                           1
[sweet, floral, tea-like, citrus fruit, stone fruit, tropical fruit]    1
[sweet, tea-like, berry, stone fruit, pome fruit]                       1
[sweet, tea-like, stone fruit, pome fr

In [34]:
#res = df.Flavor.str.join('|').str.get_dummies()
#res

mlb = MultiLabelBinarizer()

out = pd.DataFrame(mlb.fit_transform(df['Flavor']),columns=mlb.classes_, index=df.index)
out = pd.DataFrame(mlb.fit_transform(df.Flavor),
                   columns=mlb.classes_,
                   index=df.Name)

out

#out = df.assign(Flavor=df['Flavor'].str.split(', '), dummy=1).explode('Flavor') \
#        .pivot_table('dummy', 'Name', 'Flavor', fill_value=0).add_prefix('Flavor_')
#out

Unnamed: 0_level_0,alcohol,berry,cereal,chocolaty,citrus fruit,dried fruit,floral,herb-like,nutty,pome fruit,spices,stone fruit,sweet,tea-like,tropical fruit
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
HARU,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0
KEBEDE MARO,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1
AQUIARES ESTATE,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0
DUROMINA,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0
KEBEDE MARO,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1
IJEN LESTARI,0,0,0,1,0,0,0,0,0,1,0,1,1,0,1
BEDESSA,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1
MAHEMBE GITUNTU,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0
V,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1
Karora,0,0,0,0,0,1,0,0,0,0,0,0,1,1,1


In [35]:
res = pd.merge(df, out, on="Name")

In [36]:
res

Unnamed: 0,Name,Varietal,Producer,Origin,Processing,Cupping notes,Recommend for,Flavor,alcohol,berry,...,dried fruit,floral,herb-like,nutty,pome fruit,spices,stone fruit,sweet,tea-like,tropical fruit
0,HARU,74112 1377,Abayneh family & 453 smallholder farmers,"Foge,Yigracheffe",Fully washed,"Earl grey tea, Jasmine, tangerince candy, oran...","Espresso & Filter, morning","[sweet, floral, tea-like, citrus fruit]",0,0,...,0,1,0,0,0,0,0,1,1,0
1,KEBEDE MARO,"Wolisho, Dega",KEBEDE MARO,"Konga, Yigracheffe",Natural,"Passionfruit, Cordial, Mango puree, Rooibos te...","Espresso & Filter, morning","[sweet, tea-like, tropical fruit]",0,0,...,0,0,0,0,0,0,0,1,1,1
2,KEBEDE MARO,"Wolisho, Dega",KEBEDE MARO,"Konga, Yigracheffe",Natural,"Passionfruit, Cordial, Mango puree, Rooibos te...","Espresso & Filter, morning","[sweet, tea-like, tropical fruit]",0,0,...,0,1,0,0,0,0,1,1,1,1
3,KEBEDE MARO,"Wolisho, Dega",KEBEDE MARO,"Konga, Ethiopia",Natural,"Rose cordial, evaporated milk, hawaiian papaya...","Espresso & Filter, morning,afternoon,anytime","[sweet, floral, tea-like, stone fruit, tropica...",0,0,...,0,0,0,0,0,0,0,1,1,1
4,KEBEDE MARO,"Wolisho, Dega",KEBEDE MARO,"Konga, Ethiopia",Natural,"Rose cordial, evaporated milk, hawaiian papaya...","Espresso & Filter, morning,afternoon,anytime","[sweet, floral, tea-like, stone fruit, tropica...",0,0,...,0,1,0,0,0,0,1,1,1,1
5,AQUIARES ESTATE,Esperanza,Don Alfonso,"Turrialba, Costa Rica",Honey process,"Green plums, baked granny apples, light honey,...","Espresso & Filter, morning","[sweet, tea-like, stone fruit, pome fruit]",0,0,...,0,0,0,0,1,0,1,1,1,0
6,AQUIARES ESTATE,Esperanza,Don Alfonso,"Turrialba, Costa Rica",Honey process,"Green plums, baked granny apples, light honey,...","Espresso & Filter, morning","[sweet, tea-like, stone fruit, pome fruit]",1,1,...,0,0,0,0,0,0,0,1,1,0
7,AQUIARES ESTATE,Esperanza,Diego Robelo,"Turrialba, Costa Rica","Pre-drying on Patio for 2-3 days, Dried in Who...","Raspberry Candy, Blackcurrants, Rice Wine, Whi...","Espresso & Filter, morning","[sweet, tea-like, berry, alcohol]",0,0,...,0,0,0,0,1,0,1,1,1,0
8,AQUIARES ESTATE,Esperanza,Diego Robelo,"Turrialba, Costa Rica","Pre-drying on Patio for 2-3 days, Dried in Who...","Raspberry Candy, Blackcurrants, Rice Wine, Whi...","Espresso & Filter, morning","[sweet, tea-like, berry, alcohol]",1,1,...,0,0,0,0,0,0,0,1,1,0
9,DUROMINA,"74110, 74112",Various smallholder farmers,"Agaro, Ethiopia",Fully washed,"Bergamot, orange blossom, oolong tea, yellow p...","Espresso & Filter, morning","[tea-like, citrus fruit, stone fruit, pome fruit]",0,0,...,0,0,0,0,1,0,1,0,1,0


In [32]:
##Checkign if cupping notes can be used with hot one encoding

#out = df.assign(Flavor=df['Cupping notes'].str.split(', '), dummy=1).explode('Cupping notes') \
#        .pivot_table('dummy', 'Name', 'Cupping notes', fill_value=0).add_prefix('CupNotes_')
#out.head()

In [37]:
### Cupping notes not really usuable, as all too unique
### Cupping notes added on an earlier stage

In [38]:
# adding columns for recommend for brewing types and time of the day
res['brew_espresso'] = np.where(res['Recommend for'].str.contains('Espresso'), 1, 0)
res['brew_espresso'] = np.where(res['Recommend for'].str.contains('espresso'), 1, res['brew_espresso'])

res['brew_filter'] = np.where(res['Recommend for'].str.contains('Filter'), 1, 0)
res['brew_filter'] = np.where(res['Recommend for'].str.contains('filter'), 1, res['brew_filter'])

res['brew_morning'] = np.where(res['Recommend for'].str.contains('Morning'), 1, 0)
res['brew_morning'] = np.where(res['Recommend for'].str.contains('morning'), 1, res['brew_morning'])

res['brew_afternoon'] = np.where(res['Recommend for'].str.contains('Afternoon'), 1, 0)
res['brew_afternoon'] = np.where(res['Recommend for'].str.contains('afternoon'), 1, 0)

res['brew_evening'] = np.where(res['Recommend for'].str.contains('Evening'), 1, 0)
res['brew_evening'] = np.where(res['Recommend for'].str.contains('evening'), 1, res['brew_evening'])

res['brew_morning'] = np.where(res['Recommend for'].str.contains('Anytime'), 1, res['brew_morning'])
res['brew_afternoon'] = np.where(res['Recommend for'].str.contains('Anytime'), 1, res['brew_afternoon'])
res['brew_evening'] = np.where(res['Recommend for'].str.contains('Anytime'), 1, res['brew_evening'])

res['brew_morning'] = np.where(res['Recommend for'].str.contains('anytime'), 1, res['brew_morning'])
res['brew_afternoon'] = np.where(res['Recommend for'].str.contains('anytime'), 1, res['brew_afternoon'])
res['brew_evening'] = np.where(res['Recommend for'].str.contains('anytime'), 1, res['brew_evening'])

res.head()


Unnamed: 0,Name,Varietal,Producer,Origin,Processing,Cupping notes,Recommend for,Flavor,alcohol,berry,...,spices,stone fruit,sweet,tea-like,tropical fruit,brew_espresso,brew_filter,brew_morning,brew_afternoon,brew_evening
0,HARU,74112 1377,Abayneh family & 453 smallholder farmers,"Foge,Yigracheffe",Fully washed,"Earl grey tea, Jasmine, tangerince candy, oran...","Espresso & Filter, morning","[sweet, floral, tea-like, citrus fruit]",0,0,...,0,0,1,1,0,1,1,1,0,0
1,KEBEDE MARO,"Wolisho, Dega",KEBEDE MARO,"Konga, Yigracheffe",Natural,"Passionfruit, Cordial, Mango puree, Rooibos te...","Espresso & Filter, morning","[sweet, tea-like, tropical fruit]",0,0,...,0,0,1,1,1,1,1,1,0,0
2,KEBEDE MARO,"Wolisho, Dega",KEBEDE MARO,"Konga, Yigracheffe",Natural,"Passionfruit, Cordial, Mango puree, Rooibos te...","Espresso & Filter, morning","[sweet, tea-like, tropical fruit]",0,0,...,0,1,1,1,1,1,1,1,0,0
3,KEBEDE MARO,"Wolisho, Dega",KEBEDE MARO,"Konga, Ethiopia",Natural,"Rose cordial, evaporated milk, hawaiian papaya...","Espresso & Filter, morning,afternoon,anytime","[sweet, floral, tea-like, stone fruit, tropica...",0,0,...,0,0,1,1,1,1,1,1,1,1
4,KEBEDE MARO,"Wolisho, Dega",KEBEDE MARO,"Konga, Ethiopia",Natural,"Rose cordial, evaporated milk, hawaiian papaya...","Espresso & Filter, morning,afternoon,anytime","[sweet, floral, tea-like, stone fruit, tropica...",0,0,...,0,1,1,1,1,1,1,1,1,1


In [39]:
res

Unnamed: 0,Name,Varietal,Producer,Origin,Processing,Cupping notes,Recommend for,Flavor,alcohol,berry,...,spices,stone fruit,sweet,tea-like,tropical fruit,brew_espresso,brew_filter,brew_morning,brew_afternoon,brew_evening
0,HARU,74112 1377,Abayneh family & 453 smallholder farmers,"Foge,Yigracheffe",Fully washed,"Earl grey tea, Jasmine, tangerince candy, oran...","Espresso & Filter, morning","[sweet, floral, tea-like, citrus fruit]",0,0,...,0,0,1,1,0,1,1,1,0,0
1,KEBEDE MARO,"Wolisho, Dega",KEBEDE MARO,"Konga, Yigracheffe",Natural,"Passionfruit, Cordial, Mango puree, Rooibos te...","Espresso & Filter, morning","[sweet, tea-like, tropical fruit]",0,0,...,0,0,1,1,1,1,1,1,0,0
2,KEBEDE MARO,"Wolisho, Dega",KEBEDE MARO,"Konga, Yigracheffe",Natural,"Passionfruit, Cordial, Mango puree, Rooibos te...","Espresso & Filter, morning","[sweet, tea-like, tropical fruit]",0,0,...,0,1,1,1,1,1,1,1,0,0
3,KEBEDE MARO,"Wolisho, Dega",KEBEDE MARO,"Konga, Ethiopia",Natural,"Rose cordial, evaporated milk, hawaiian papaya...","Espresso & Filter, morning,afternoon,anytime","[sweet, floral, tea-like, stone fruit, tropica...",0,0,...,0,0,1,1,1,1,1,1,1,1
4,KEBEDE MARO,"Wolisho, Dega",KEBEDE MARO,"Konga, Ethiopia",Natural,"Rose cordial, evaporated milk, hawaiian papaya...","Espresso & Filter, morning,afternoon,anytime","[sweet, floral, tea-like, stone fruit, tropica...",0,0,...,0,1,1,1,1,1,1,1,1,1
5,AQUIARES ESTATE,Esperanza,Don Alfonso,"Turrialba, Costa Rica",Honey process,"Green plums, baked granny apples, light honey,...","Espresso & Filter, morning","[sweet, tea-like, stone fruit, pome fruit]",0,0,...,0,1,1,1,0,1,1,1,0,0
6,AQUIARES ESTATE,Esperanza,Don Alfonso,"Turrialba, Costa Rica",Honey process,"Green plums, baked granny apples, light honey,...","Espresso & Filter, morning","[sweet, tea-like, stone fruit, pome fruit]",1,1,...,0,0,1,1,0,1,1,1,0,0
7,AQUIARES ESTATE,Esperanza,Diego Robelo,"Turrialba, Costa Rica","Pre-drying on Patio for 2-3 days, Dried in Who...","Raspberry Candy, Blackcurrants, Rice Wine, Whi...","Espresso & Filter, morning","[sweet, tea-like, berry, alcohol]",0,0,...,0,1,1,1,0,1,1,1,0,0
8,AQUIARES ESTATE,Esperanza,Diego Robelo,"Turrialba, Costa Rica","Pre-drying on Patio for 2-3 days, Dried in Who...","Raspberry Candy, Blackcurrants, Rice Wine, Whi...","Espresso & Filter, morning","[sweet, tea-like, berry, alcohol]",1,1,...,0,0,1,1,0,1,1,1,0,0
9,DUROMINA,"74110, 74112",Various smallholder farmers,"Agaro, Ethiopia",Fully washed,"Bergamot, orange blossom, oolong tea, yellow p...","Espresso & Filter, morning","[tea-like, citrus fruit, stone fruit, pome fruit]",0,0,...,0,1,0,1,0,1,1,1,0,0


In [40]:
### dropping columns not using for recommendation
res = res.drop(["Varietal","Producer", "Origin", "Processing", "Cupping notes", "Recommend for", "Flavor"], axis=1)
res

Unnamed: 0,Name,alcohol,berry,cereal,chocolaty,citrus fruit,dried fruit,floral,herb-like,nutty,...,spices,stone fruit,sweet,tea-like,tropical fruit,brew_espresso,brew_filter,brew_morning,brew_afternoon,brew_evening
0,HARU,0,0,0,0,1,0,1,0,0,...,0,0,1,1,0,1,1,1,0,0
1,KEBEDE MARO,0,0,0,0,0,0,0,0,0,...,0,0,1,1,1,1,1,1,0,0
2,KEBEDE MARO,0,0,0,0,0,0,1,0,0,...,0,1,1,1,1,1,1,1,0,0
3,KEBEDE MARO,0,0,0,0,0,0,0,0,0,...,0,0,1,1,1,1,1,1,1,1
4,KEBEDE MARO,0,0,0,0,0,0,1,0,0,...,0,1,1,1,1,1,1,1,1,1
5,AQUIARES ESTATE,0,0,0,0,0,0,0,0,0,...,0,1,1,1,0,1,1,1,0,0
6,AQUIARES ESTATE,1,1,0,0,0,0,0,0,0,...,0,0,1,1,0,1,1,1,0,0
7,AQUIARES ESTATE,0,0,0,0,0,0,0,0,0,...,0,1,1,1,0,1,1,1,0,0
8,AQUIARES ESTATE,1,1,0,0,0,0,0,0,0,...,0,0,1,1,0,1,1,1,0,0
9,DUROMINA,0,0,0,0,1,0,0,0,0,...,0,1,0,1,0,1,1,1,0,0
