<a href="https://colab.research.google.com/github/Hanifanta/Capstone_RECCOFFEE/blob/main/notebook/recoffee_recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##**Import Library**

In [1]:
# Data loading and data analysis
import pandas as pd

# Modeling
from sklearn.metrics.pairwise import cosine_similarity

##**Data Preprocession**

In [2]:
coffee = pd.read_csv('coffee_recom.csv')
coffee.head(5)

Unnamed: 0.1,Unnamed: 0,name,origin,rating,roaster,aroma,acid,body,flavor,aftertaste,desc_1,desc_2,desc_3,label
0,0,“Sweety” Espresso Blend,Panama; Ethiopia,95,A.R.C.,9.0,0.0,9.0,9.0,9.0,"Evaluated as espresso. Sweet-toned, deeply ric...",An espresso blend comprised of coffees from Pa...,A radiant espresso blend that shines equally i...,"9.0,0.0,9.0,9.0,9.0"
1,1,Flora Blend Espresso,Africa; Asia Pacific,94,A.R.C.,9.0,0.0,9.0,9.0,8.0,"Evaluated as espresso. Sweetly tart, floral-to...",An espresso blend comprised of coffees from Af...,"A floral-driven straight shot, amplified with ...","9.0,0.0,9.0,9.0,8.0"
2,2,Ethiopia Shakiso Mormora,"Guji Zone, southern Ethiopia",92,Revel Coffee,9.0,8.0,8.0,9.0,8.0,"Crisply sweet, cocoa-toned. Lemon blossom, roa...",This coffee tied for the third-highest rating ...,"A gently spice-toned, floral- driven wet-proce...","9.0,8.0,8.0,9.0,8.0"
3,3,Ethiopia Suke Quto,"Guji Zone, Oromia Region, south-central Ethiopia",92,Roast House,8.0,8.0,9.0,9.0,8.0,"Delicate, sweetly spice-toned. Pink peppercorn...",This coffee tied for the third-highest rating ...,Lavender-like flowers and hints of zesty pink ...,"8.0,8.0,9.0,9.0,8.0"
4,4,Ethiopia Gedeb Halo Beriti,"Gedeb District, Gedeo Zone, southern Ethiopia",94,Big Creek Coffee Roasters,9.0,9.0,9.0,9.0,8.0,"Deeply sweet, subtly pungent. Honey, pear, tan...",Southern Ethiopia coffees like this one are pr...,A deeply and generously lush cup saved from se...,"9.0,9.0,9.0,9.0,8.0"


In [3]:
# Drop unused column
coffee.drop(['Unnamed: 0'], inplace=True, axis=1)

In [4]:
coffee.isnull().sum()

name          0
origin        0
rating        0
roaster       0
aroma         0
acid          0
body          0
flavor        0
aftertaste    0
desc_1        0
desc_2        0
desc_3        0
label         0
dtype: int64

#**Modelling**

In [5]:
labelling = coffee.filter(['name', 'label'])
labelling.head(5)

Unnamed: 0,name,label
0,“Sweety” Espresso Blend,"9.0,0.0,9.0,9.0,9.0"
1,Flora Blend Espresso,"9.0,0.0,9.0,9.0,8.0"
2,Ethiopia Shakiso Mormora,"9.0,8.0,8.0,9.0,8.0"
3,Ethiopia Suke Quto,"8.0,8.0,9.0,9.0,8.0"
4,Ethiopia Gedeb Halo Beriti,"9.0,9.0,9.0,9.0,8.0"


In [6]:
# Get dummy data for label column
label_dum = pd.get_dummies(labelling['label'])

labelling_new = pd.concat([labelling, label_dum], axis=1)
labelling_new.drop('label', inplace=True, axis=1)
labelling_new.drop('name', inplace=True, axis=1)
labelling_new.head(5)

Unnamed: 0,"0.0,0.0,0.0,0.0,0.0","0.0,8.0,8.0,9.0,7.0","0.0,8.0,8.0,9.0,8.0","0.0,8.0,8.0,9.0,9.0","0.0,8.0,9.0,9.0,8.0","0.0,8.0,9.0,9.0,9.0","0.0,9.0,8.0,9.0,8.0","0.0,9.0,8.0,9.0,9.0","0.0,9.0,9.0,9.0,8.0","0.0,9.0,9.0,9.0,9.0",...,"9.0,9.0,8.0,10.0,9.0","9.0,9.0,8.0,9.0,7.0","9.0,9.0,8.0,9.0,8.0","9.0,9.0,8.0,9.0,9.0","9.0,9.0,9.0,10.0,8.0","9.0,9.0,9.0,10.0,9.0","9.0,9.0,9.0,7.0,9.0","9.0,9.0,9.0,9.0,7.0","9.0,9.0,9.0,9.0,8.0","9.0,9.0,9.0,9.0,9.0"
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [7]:
# Check shape of dummy data
labelling_new.shape 

(2077, 118)

In [8]:
# Get similarity of dummy data
cosine_sim = cosine_similarity(labelling_new) 
cosine_sim

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 1.],
       [0., 0., 1., ..., 0., 1., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 1., ..., 0., 1., 0.],
       [0., 1., 0., ..., 0., 0., 1.]])

In [9]:
# Create a dataframe from the cosine_sim variable with rows and columns using the name of coffee
cosine_sim_df = pd.DataFrame(cosine_sim, index=labelling['name'], columns=labelling['name'])
print('Shape:', cosine_sim_df.shape)
 
# See the similarity matrix for each coffee
cosine_sim_df.sample(5, axis=1).sample(10, axis=0)

Shape: (2077, 2077)


name,Honey-Processed Yunnan Espresso,Mocha Java,Revolution Medium Roast,Minas Gerais Isaias Ferreira No. 2,H Coffee Espresso Blend 01
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Yemen Jabal Nabi Special Reserve,0.0,0.0,0.0,0.0,0.0
Boru Batak Sumatra,0.0,0.0,0.0,0.0,0.0
Ethiopia Buku Abela,0.0,0.0,0.0,0.0,0.0
Ethiopia Gogogu Bekaka #4,0.0,0.0,0.0,0.0,0.0
Lian House Blend Espresso,0.0,0.0,0.0,0.0,0.0
El Salvador Finca La Esperanza,0.0,0.0,0.0,0.0,0.0
Philippine Farmer Milled Lot,0.0,0.0,0.0,0.0,0.0
Gura Kenya,0.0,0.0,0.0,0.0,0.0
Kenya AA Top Lot,0.0,0.0,0.0,0.0,0.0
Ethiopia Gugi,0.0,0.0,0.0,0.0,0.0


In [10]:
def app_recom(label, similarity_data=cosine_sim_df, items=coffee[['name', 'origin', 'rating', 'roaster', 'aroma', 'acid', 'body', 'flavor', 'aftertaste', 'desc_1', 'desc_2', 'desc_3']], k=5):

    # Retrieve data by using argpartition to partition indirectly along a given axis    
    # Dataframe changed to numpy
    # Range(start, stop, step)
    index = similarity_data.loc[:,label].to_numpy().argpartition(
        range(-1, -k, -1))
    
    # Retrieving data with the greatest similarity from the existing index
    closest = similarity_data.columns[index[-1:-(k+2):-1]]
    
    # Drop the coffee name so that the coffee name you are looking for does not appear in the list of recommendations
    closest = closest.drop(label, errors='ignore')
 
    return pd.DataFrame(closest).merge(items).head(k)

In [11]:
coffee[coffee['name'] == '“Sweety” Espresso Blend']

Unnamed: 0,name,origin,rating,roaster,aroma,acid,body,flavor,aftertaste,desc_1,desc_2,desc_3,label
0,“Sweety” Espresso Blend,Panama; Ethiopia,95,A.R.C.,9.0,0.0,9.0,9.0,9.0,"Evaluated as espresso. Sweet-toned, deeply ric...",An espresso blend comprised of coffees from Pa...,A radiant espresso blend that shines equally i...,"9.0,0.0,9.0,9.0,9.0"


In [12]:
app_recom('“Sweety” Espresso Blend')

Unnamed: 0,name,origin,rating,roaster,aroma,acid,body,flavor,aftertaste,desc_1,desc_2,desc_3
0,Costa Rica Aris Red Honey Lot 1901 Espresso,Costa Rica,95,Kakalove Cafe,9.0,0.0,9.0,9.0,9.0,"Evaluated as espresso. Delicate, floral-and fr...","Processed by the red honey method, meaning tha...","A nuanced, multi-layered red honey coffee from..."
1,Costa Rica Perla Negra,"Sabanilla de Alajuela growing region, Costa Rica",95,Durango Coffee Company,9.0,0.0,9.0,9.0,9.0,"Evaluated as espresso. Deeply rich, crisply sw...",This exceptional coffee was selected as the No...,"A bold, lively, expressive natural-processed C..."
2,Fruity Espresso Blend,Panama; Ethiopia; Kenya,95,NINETYs Roastery,9.0,0.0,9.0,9.0,9.0,"Evaluated as espresso. Bright, balanced, deep,...",An espresso blend comprised of washed-process ...,"A multi-layered, richly complex espresso blend..."
3,Kona Natural Espresso,"Holualoa, North Kona growing district, “Big Is...",95,Hula Daddy Kona Coffee,9.0,0.0,9.0,9.0,9.0,"Evaluated as espresso. Richly chocolaty, sweet...","Produced by Karen and Lee Patterson, of Hula D...",A Kona on steroids: Hula Daddy is just beginni...
4,Poseidon Blend Espresso,"Yirgacheffe, Sidamo, Huehuetenango",95,GK Coffee,9.0,0.0,9.0,9.0,9.0,"Evaluated as espresso. Richly floral-toned, el...","A blend of three coffees: two Ethiopias, one f...",A deftly blended espresso as good as a straigh...


In [13]:
coffee[coffee['name'] == 'Ethiopia Kayon Mountain']

Unnamed: 0,name,origin,rating,roaster,aroma,acid,body,flavor,aftertaste,desc_1,desc_2,desc_3,label
5,Ethiopia Kayon Mountain,"Odo Shakiso District, Guji Zone, southern Ethi...",93,Red Rooster Coffee Roaster,9.0,8.0,9.0,9.0,8.0,"Delicate, richly and sweetly tart. Dried hibis...",This coffee tied for the second-highest rating...,"A lively and crisply sweet, fruit-forward natu...","9.0,8.0,9.0,9.0,8.0"


In [14]:
app_recom('Ethiopia Kayon Mountain')

Unnamed: 0,name,origin,rating,roaster,aroma,acid,body,flavor,aftertaste,desc_1,desc_2,desc_3
0,Honduras Blue Hole Estate,Honduras,93,David's Nose,9.0,8.0,9.0,9.0,8.0,"Deeply sweet, fruit-forward. Raspberry coulis,...",Produced at Blue Hole Estate entirely of the C...,"Spicy florals, crisply sweet nut tones, and di..."
1,Philippine Farmer Milled Lot,"Atok, Benguet, The Philippines",93,Mostra Coffee,9.0,8.0,9.0,9.0,8.0,"Spice-toned, deeply chocolaty. Dark chocolate,...",Produced by a small community of coffee grower...,"A balanced, rich-toned, darker roasted coffee ..."
2,Peru Wilmer Alarcon Mirez,"Cajamarca, Peru",93,Big Shoulders Coffee,9.0,8.0,9.0,9.0,8.0,"Richly aromatic, chocolaty. Dark chocolate, Bi...",Produced by Wilmer Alarcon Mirez from trees of...,"A rich, floral-driven Peru cup with stone frui..."
3,Costa Rica Brunca Finca Vista Passion Honey,"Brunca, Costa Rica",93,94 Fresh Coffee,9.0,8.0,9.0,9.0,8.0,"Sweetly tart, very lightly fermenty. Chocolate...",This Costa Rica was processed by a refined var...,A sweet but brisk cup saturated by a seductive...
4,Ecuador Finca Carolina Fausto Romo,"Pichincha Province, north-central Ecuador",93,PT's Coffee Roasting Co.,9.0,8.0,9.0,9.0,8.0,"Rich-toned, deeply sweet and complexly engagin...",This coffee tied for the highest rating in a c...,"An impressive Ecuador cup: big, resonant, laye..."
