<a href="https://colab.research.google.com/github/Hanifanta/Capstone_RECCOFFEE/blob/main/notebook/recoffee_recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##**Import Library**

In [1]:
# Data loading and data analysis
import pandas as pd

# Modeling
from sklearn.metrics.pairwise import cosine_similarity

##**Data Preprocession**

In [2]:
coffee = pd.read_csv('coffee_recom.csv')
coffee.head(5)

Unnamed: 0,slug,name,origin,rating,roaster,aroma,acid_or_milk,body,flavor,aftertaste,desc_1,desc_2,desc_3,label
0,https://www.coffeereview.com/review/ethiopia-r...,Ethiopia Reko Yirgacheffe,"Yirgacheffe growing region, southern Ethiopia",94,Temple Coffee,9.0,9.0,9.0,9.0,8.0,"Rich-toned, deeply floral. White peach, tea ro...",Produced by the Reko Farming Cooperative from ...,A classic washed Yirgacheffe cup driven by not...,"9.0,9.0,9.0,9.0,8.0"
1,https://www.coffeereview.com/review/double-ana...,Double-Anaerobic Fermentation Tamiru Tadesse,"Sidamo growing region, south-central Ethiopia",94,Genesis Coffee Lab,9.0,9.0,9.0,9.0,8.0,"Richly floral-toned, crisply sweet. Honeysuckl...",Produced by Tamiru Tadesse of Alo Coffee. Sout...,A floral-driven anaerobic Ethiopia cup free of...,"9.0,9.0,9.0,9.0,8.0"
2,https://www.coffeereview.com/review/ethiopia-c...,Ethiopia Chelbesa Danche Yirgacheffe,"Chelchele, Gedeb District, Gedeo Zone, Ethiopia",94,Jaunt Coffee Roasters,9.0,9.0,9.0,9.0,8.0,"Elegantly sweet, floral-toned. Lilac, almond, ...",Produced by the METAD farm and mill in souther...,"A brightly sweet, fragrantly floral-driven was...","9.0,9.0,9.0,9.0,8.0"
3,https://www.coffeereview.com/review/ethiopia-h...,Ethiopia Honey Process Tamiru Tadesse,"Sidamo growing region, south-central Ethiopia",94,Genesis Coffee Lab,9.0,9.0,9.0,9.0,8.0,"Delicately fruit-toned, tisane-like. Dried str...",Produced by Tamiru Tadesse of Alo Coffee. Sout...,"A delicate, elegant, honey-processed Sidamo cu...","9.0,9.0,9.0,9.0,8.0"
4,https://www.coffeereview.com/review/colombia-p...,Colombia Planadas Tolima,"Tolima, Colombia",93,Jaunt Coffee Roasters,9.0,9.0,8.0,9.0,8.0,"Delicate, richly aromatic. Pineapple, almond n...",Produced by Jorge Elias Rojas entirely of the ...,"A lovely Colombia Geisha cup, finely tea-like ...","9.0,9.0,8.0,9.0,8.0"


In [3]:
coffee.isnull().sum()

slug            0
name            0
origin          0
rating          0
roaster         0
aroma           0
acid_or_milk    0
body            0
flavor          0
aftertaste      0
desc_1          0
desc_2          0
desc_3          0
label           0
dtype: int64

#**Modelling**

In [4]:
labelling = coffee.filter(['name', 'label'])
labelling.head(5)

Unnamed: 0,name,label
0,Ethiopia Reko Yirgacheffe,"9.0,9.0,9.0,9.0,8.0"
1,Double-Anaerobic Fermentation Tamiru Tadesse,"9.0,9.0,9.0,9.0,8.0"
2,Ethiopia Chelbesa Danche Yirgacheffe,"9.0,9.0,9.0,9.0,8.0"
3,Ethiopia Honey Process Tamiru Tadesse,"9.0,9.0,9.0,9.0,8.0"
4,Colombia Planadas Tolima,"9.0,9.0,8.0,9.0,8.0"


In [5]:
# Get dummy data for label column
label_dum = pd.get_dummies(labelling['label'])

labelling_new = pd.concat([labelling, label_dum], axis=1)
labelling_new.drop('label', inplace=True, axis=1)
labelling_new.drop('name', inplace=True, axis=1)
labelling_new.head(5)

Unnamed: 0,"0.0,8.0,8.0,9.0,7.0","0.0,8.0,8.0,9.0,8.0","0.0,8.0,8.0,9.0,9.0","0.0,8.0,9.0,9.0,8.0","0.0,8.0,9.0,9.0,9.0","0.0,9.0,8.0,9.0,8.0","0.0,9.0,8.0,9.0,9.0","0.0,9.0,9.0,9.0,8.0","0.0,9.0,9.0,9.0,9.0","10.0,10.0,9.0,10.0,8.0",...,"9.0,9.0,8.0,9.0,7.0","9.0,9.0,8.0,9.0,8.0","9.0,9.0,8.0,9.0,9.0","9.0,9.0,9.0,10.0,8.0","9.0,9.0,9.0,10.0,9.0","9.0,9.0,9.0,8.0,8.0","9.0,9.0,9.0,9.0,10.0","9.0,9.0,9.0,9.0,7.0","9.0,9.0,9.0,9.0,8.0","9.0,9.0,9.0,9.0,9.0"
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0


In [6]:
# Check shape of dummy data
labelling_new.shape 

(2071, 106)

In [7]:
# Get similarity of dummy data
cosine_sim = cosine_similarity(labelling_new) 
cosine_sim

array([[1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [8]:
# Create a dataframe from the cosine_sim variable with rows and columns using the name of coffee
cosine_sim_df = pd.DataFrame(cosine_sim, index=labelling['name'], columns=labelling['name'])
print('Shape:', cosine_sim_df.shape)
 
# See the similarity matrix for each coffee
cosine_sim_df.sample(5, axis=1).sample(10, axis=0)

Shape: (2071, 2071)


name,Philippines Sitio Naguey Natural,Ruby Sweet Lychee,Costa Rica Tirra Natural,San Jose Mexico,Ethiopia Natural Guji TAMIRU 74158 Kakalove Lot
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Honduras Anaerobic Natural,0.0,0.0,0.0,0.0,0.0
Cream Tabby Espresso Blend,0.0,0.0,0.0,0.0,0.0
Colombia Finca El Paraiso Geisha Letty,0.0,0.0,0.0,0.0,0.0
Kenya Gakuyuini AA,0.0,0.0,0.0,0.0,0.0
Ethiopia Guji Natural,0.0,0.0,0.0,0.0,0.0
Ethiopia Bedhatu Washed,0.0,0.0,1.0,0.0,0.0
Peru Wilmer Alarcon Mirez,0.0,0.0,0.0,0.0,0.0
Tanzania Gesha Peaberry,0.0,0.0,0.0,0.0,1.0
Kenya AB Muchoki,0.0,0.0,0.0,0.0,0.0
Panama Elida Geisha Natural 1029,0.0,0.0,0.0,0.0,0.0


In [9]:
def app_recom(label, similarity_data=cosine_sim_df, items=coffee[['name', 'origin', 'rating', 'roaster', 'aroma', 'acid_or_milk', 'body', 'flavor', 'aftertaste', 'desc_1', 'desc_2', 'desc_3']], k=5):

    # Retrieve data by using argpartition to partition indirectly along a given axis    
    # Dataframe changed to numpy
    # Range(start, stop, step)
    index = similarity_data.loc[:,label].to_numpy().argpartition(
        range(-1, -k, -1))
    
    # Retrieving data with the greatest similarity from the existing index
    closest = similarity_data.columns[index[-1:-(k+2):-1]]
    
    # Drop the coffee name so that the coffee name you are looking for does not appear in the list of recommendations
    closest = closest.drop(label, errors='ignore')
 
    return pd.DataFrame(closest).merge(items).head(k)

In [11]:
coffee[coffee['name'] == 'Honduras Anaerobic Natural']

Unnamed: 0,slug,name,origin,rating,roaster,aroma,acid_or_milk,body,flavor,aftertaste,desc_1,desc_2,desc_3,label
403,https://www.coffeereview.com/review/honduras-a...,Honduras Anaerobic Natural,Honduras,93,Min Enjoy Cafe,9.0,8.0,9.0,9.0,8.0,"Pungent, sweetly tart, fruity. Sweet-Tarts can...",Comprised entirely of the Paraneima variety of...,An experimentally processed Honduras with a co...,"9.0,8.0,9.0,9.0,8.0"


In [12]:
app_recom('Honduras Anaerobic Natural')

Unnamed: 0,name,origin,rating,roaster,aroma,acid_or_milk,body,flavor,aftertaste,desc_1,desc_2,desc_3
0,Project Congo Muungano,"South Kivu, Democratic Republic of the Congo",93,Wonderstate Coffee,9.0,8.0,9.0,9.0,8.0,"Crisply sweet, deeply rich. Date, baking choco...",This coffee tied for the second-highest rating...,"A chocolaty, sweetly nutty Congo cup from a hi..."
1,Ethiopia Yirgacheffe Adorsi G1,"Yirgacheffe growing region, south-central Ethi...",93,VERYTIME,9.0,8.0,9.0,9.0,8.0,"Deeply sweet, intense, perfumy. Guava, ripe ba...",Produced by Aricha Adorsi from trees of indige...,"A chocolaty, fruit-forward cup with undertones..."
2,Guatemala Vista Al Bosque,"Huehuetenango, Guatemala",93,Oak & Bond Coffee Company,9.0,8.0,9.0,9.0,8.0,"Chocolaty, floral-toned. Dark chocolate, rhodo...",Produced by Wilmar Castillo at Vista Al Bosque...,A richly sweet Guatemala cup with a throughlin...
3,Rwanda Akagera CWS Fully Washed Lot 9,"Nyaruguru District, Rwanda",93,Kakalove Cafe,9.0,8.0,9.0,9.0,8.0,"Resonant, rich, multi-layered. Dark chocolate,...",This coffee tied for the second-highest rating...,"A deep-toned, chocolaty and floral Rwanda cup ..."
4,Karamo Ethiopia Sidama Natural G1,"Sidama growing region, southern Ethiopia",93,1980 CAFE,9.0,8.0,9.0,9.0,8.0,"Deeply fruit-toned, richly chocolaty. Dried ap...",Produced by smallholding farmers entirely of t...,"An invitingly sweet, crisply tart, chocolaty n..."


In [13]:
coffee[coffee['name'] == 'Ethiopia Kayon Mountain']

Unnamed: 0,slug,name,origin,rating,roaster,aroma,acid_or_milk,body,flavor,aftertaste,desc_1,desc_2,desc_3,label
172,https://www.coffeereview.com/review/ethiopia-k...,Ethiopia Kayon Mountain,"Odo Shakiso District, Guji Zone, southern Ethi...",94,Yellow Brick Coffee,9.0,9.0,9.0,9.0,8.0,"Richly sweet-tart, high-toned. Apricot nectar,...",Produced by the Hassen family at their estate ...,"A classic washed Ethiopia cup: deep-toned, liv...","9.0,9.0,9.0,9.0,8.0"


In [14]:
app_recom('Ethiopia Kayon Mountain')

Unnamed: 0,name,origin,rating,roaster,aroma,acid_or_milk,body,flavor,aftertaste,desc_1,desc_2,desc_3
0,Ethiopia Reko Yirgacheffe,"Yirgacheffe growing region, southern Ethiopia",94,Temple Coffee,9.0,9.0,9.0,9.0,8.0,"Rich-toned, deeply floral. White peach, tea ro...",Produced by the Reko Farming Cooperative from ...,A classic washed Yirgacheffe cup driven by not...
1,Gigesa Ethiopia,"Guji Zone, Oromia Region, Ethiopia",94,JBC Coffee Roasters,9.0,9.0,9.0,9.0,8.0,"Complex, multi-layered, deeply sweet. Dark cho...",Southern Ethiopia coffees like this one are la...,"A deep-toned Ethiopia natural: cleanly fruity,..."
2,Oahu Triple-Bourbon “Sparkling” Yeast Ferment,"Maunawili growing region, southeastern Oahu, H...",94,Rusty's Hawaiian,9.0,9.0,9.0,9.0,8.0,"Complex, multi-layered, gently fruit-forward. ...","Comprised of three varieties of Bourbon — red,...",An unusual coffee in almost every respect: Fro...
3,Taiwan Washed Alishan Ching-Ye Farm SL34,"Alishan, Chia-Yi City, Taiwan",94,Kakalove Cafe,9.0,9.0,9.0,9.0,8.0,"Richly sweet-tart, delicately fruit-toned. Wil...",Produced at Ching-Ye farm in Taiwan entirely o...,"A complex, intriguing coffee from Alishan, a r..."
4,Ethiopia Guji Odo Shakiso,"Guji Zone, Oromia Region, southern Ethiopia",94,Merge Coffee Company,9.0,9.0,9.0,9.0,8.0,"Gently sweet-tart, complexly layered. Dried bl...",Southern Ethiopia coffees like this one are la...,"A fruit-forward, natural-processed Guji cup wi..."
