<a href="https://colab.research.google.com/github/Hanifanta/Capstone_RECCOFFEE/blob/main/notebook/recoffee_recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##**Import Library**

In [None]:
# Data loading and data analysis
import pandas as pd

# Modeling
from sklearn.metrics.pairwise import cosine_similarity

##**Data Preprocession**

In [None]:
coffee = pd.read_csv('coffee_recom.csv', engine='python')
coffee.head(5)

Unnamed: 0.1,Unnamed: 0,name,origin,rating,roaster,aroma,acid,body,flavor,aftertaste,desc_1,desc_2,desc_3,label
0,0,“Sweety” Espresso Blend,Panama; Ethiopia,95,A.R.C.,9.0,0.0,9.0,9.0,9.0,"Evaluated as espresso. Sweet-toned, deeply ric...",An espresso blend comprised of coffees from Pa...,A radiant espresso blend that shines equally i...,"9.0,0.0,9.0,9.0,9.0"
1,1,Flora Blend Espresso,Africa; Asia Pacific,94,A.R.C.,9.0,0.0,9.0,9.0,8.0,"Evaluated as espresso. Sweetly tart, floral-to...",An espresso blend comprised of coffees from Af...,"A floral-driven straight shot, amplified with ...","9.0,0.0,9.0,9.0,8.0"
2,2,Ethiopia Shakiso Mormora,"Guji Zone, southern Ethiopia",92,Revel Coffee,9.0,8.0,8.0,9.0,8.0,"Crisply sweet, cocoa-toned. Lemon blossom, roa...",This coffee tied for the third-highest rating ...,"A gently spice-toned, floral- driven wet-proce...","9.0,8.0,8.0,9.0,8.0"
3,3,Ethiopia Suke Quto,"Guji Zone, Oromia Region, south-central Ethiopia",92,Roast House,8.0,8.0,9.0,9.0,8.0,"Delicate, sweetly spice-toned. Pink peppercorn...",This coffee tied for the third-highest rating ...,Lavender-like flowers and hints of zesty pink ...,"8.0,8.0,9.0,9.0,8.0"
4,4,Ethiopia Gedeb Halo Beriti,"Gedeb District, Gedeo Zone, southern Ethiopia",94,Big Creek Coffee Roasters,9.0,9.0,9.0,9.0,8.0,"Deeply sweet, subtly pungent. Honey, pear, tan...",Southern Ethiopia coffees like this one are pr...,A deeply and generously lush cup saved from se...,"9.0,9.0,9.0,9.0,8.0"


In [None]:
# Drop unused column
coffee.drop(['Unnamed: 0'], inplace=True, axis=1)

In [None]:
coffee.isnull().sum()

name          0
origin        0
rating        0
roaster       0
aroma         0
acid          0
body          0
flavor        0
aftertaste    0
desc_1        0
desc_2        0
desc_3        0
label         0
dtype: int64

In [None]:
print(f'Data consist of {coffee.shape[1]} columns')
print(f'Each column consists of {coffee.shape[0]} records')

Data consist of 13 columns
Each column consists of 2282 records


#**Modelling**

In [None]:
labelling = coffee.filter(['name', 'label'])
labelling.head(5)

Unnamed: 0,name,label
0,“Sweety” Espresso Blend,"9.0,0.0,9.0,9.0,9.0"
1,Flora Blend Espresso,"9.0,0.0,9.0,9.0,8.0"
2,Ethiopia Shakiso Mormora,"9.0,8.0,8.0,9.0,8.0"
3,Ethiopia Suke Quto,"8.0,8.0,9.0,9.0,8.0"
4,Ethiopia Gedeb Halo Beriti,"9.0,9.0,9.0,9.0,8.0"


In [None]:
# Get dummy data for label column
label_dum = pd.get_dummies(labelling['label'])

labelling_new = pd.concat([labelling, label_dum], axis=1)
labelling_new.drop('label', inplace=True, axis=1)
labelling_new.drop('name', inplace=True, axis=1)
labelling_new.head(5)

Unnamed: 0,"0.0,0.0,0.0,0.0,0.0","0.0,8.0,8.0,8.0,8.0","0.0,8.0,8.0,9.0,7.0","0.0,8.0,8.0,9.0,8.0","0.0,8.0,8.0,9.0,9.0","0.0,8.0,9.0,9.0,8.0","0.0,8.0,9.0,9.0,9.0","0.0,9.0,8.0,9.0,8.0","0.0,9.0,8.0,9.0,9.0","0.0,9.0,9.0,9.0,8.0",...,"9.0,9.0,8.0,10.0,9.0","9.0,9.0,8.0,9.0,7.0","9.0,9.0,8.0,9.0,8.0","9.0,9.0,8.0,9.0,9.0","9.0,9.0,9.0,10.0,8.0","9.0,9.0,9.0,10.0,9.0","9.0,9.0,9.0,7.0,9.0","9.0,9.0,9.0,9.0,7.0","9.0,9.0,9.0,9.0,8.0","9.0,9.0,9.0,9.0,9.0"
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [None]:
# Check shape of dummy data
labelling_new.shape 

(2282, 121)

In [None]:
# Get similarity of dummy data
cosine_sim = cosine_similarity(labelling_new) 
cosine_sim

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 1.],
       [0., 0., 1., ..., 0., 1., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 1., ..., 0., 1., 0.],
       [0., 1., 0., ..., 0., 0., 1.]])

In [None]:
# Create a dataframe from the cosine_sim variable with rows and columns using the name of coffee
cosine_sim_df = pd.DataFrame(cosine_sim, index=labelling['name'], columns=labelling['name'])
print('Shape:', cosine_sim_df.shape)
 
# See the similarity matrix for each coffee
cosine_sim_df.sample(5, axis=1).sample(10, axis=0)

Shape: (2282, 2282)


name,Ethiopia Guji Hambela G1 Natural,Gedeb Ethiopia,KANU Colombia Mild Roast Americano (Instant Coffee),Ka’u Classic Dark,Angamaza
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Red Clay Espresso Blend,0.0,0.0,0.0,0.0,0.0
Colombia Tolima,0.0,0.0,0.0,0.0,0.0
Proyecto Aurora,0.0,0.0,0.0,0.0,0.0
Organic Heartstrings,0.0,0.0,0.0,0.0,0.0
Rukera Espresso,0.0,0.0,0.0,0.0,0.0
Nayamasasa Democratic Republic of the Congo,0.0,0.0,0.0,0.0,0.0
Ethiopia Yirgacheffe Koke,0.0,0.0,0.0,0.0,0.0
Nicaragua Jinotega Hacienda La Bastilla Blue Candy Natural,0.0,0.0,0.0,0.0,0.0
Kenya Karindudu AA,0.0,1.0,0.0,0.0,0.0
Ka’u Classic Dark,0.0,0.0,0.0,1.0,1.0


In [None]:
def app_recom(label, similarity_data=cosine_sim_df, items=coffee[['name', 'origin', 'rating', 'roaster', 'aroma', 'acid', 'body', 'flavor', 'aftertaste', 'desc_1', 'desc_2', 'desc_3']], k=5):

    # Retrieve data by using argpartition to partition indirectly along a given axis    
    # Dataframe changed to numpy
    # Range(start, stop, step)
    index = similarity_data.loc[:,label].to_numpy().argpartition(
        range(-1, -k, -1))
    
    # Retrieving data with the greatest similarity from the existing index
    closest = similarity_data.columns[index[-1:-(k+2):-1]]
    
    # Drop the coffee name so that the coffee name you are looking for does not appear in the list of recommendations
    closest = closest.drop(label, errors='ignore')
 
    return pd.DataFrame(closest).merge(items).head(k)

In [None]:
coffee[coffee['name'] == '“Sweety” Espresso Blend']

Unnamed: 0,name,origin,rating,roaster,aroma,acid,body,flavor,aftertaste,desc_1,desc_2,desc_3,label
0,“Sweety” Espresso Blend,Panama; Ethiopia,95,A.R.C.,9.0,0.0,9.0,9.0,9.0,"Evaluated as espresso. Sweet-toned, deeply ric...",An espresso blend comprised of coffees from Pa...,A radiant espresso blend that shines equally i...,"9.0,0.0,9.0,9.0,9.0"


In [None]:
app_recom('“Sweety” Espresso Blend')

Unnamed: 0,name,origin,rating,roaster,aroma,acid,body,flavor,aftertaste,desc_1,desc_2,desc_3
0,Panama Boquete Flor De Mariposa Geisha Espresso,"Boquete growing region, western Panama",96,Kakalove Cafe,9.0,0.0,9.0,9.0,9.0,"Evaluated as espresso. Richly floral, complex,...",This exceptional coffee was selected as the No...,"An exquisitely floral-toned, chocolaty single-..."
1,Kona Natural Espresso,"Holualoa, North Kona growing district, “Big Is...",95,Hula Daddy Kona Coffee,9.0,0.0,9.0,9.0,9.0,"Evaluated as espresso. Richly chocolaty, sweet...","Produced by Karen and Lee Patterson, of Hula D...",A Kona on steroids: Hula Daddy is just beginni...
2,YCFCU Banko Gotiti Coop G1,"Yirgacheffe growing region, southern Ethiopia",95,Taokas Coffee,9.0,0.0,9.0,9.0,9.0,"Evaluated as espresso. Lush, complex, richly s...",This coffee tied for the highest rating in a t...,A natural-processed Ethiopia roasted for espre...
3,Kenya Washed Nakuru Gachombe Factory AB,"Nakuru County, Kenya",95,Kakalove Cafe,9.0,0.0,9.0,9.0,9.0,"Evaluated as espresso. Richly aromatic, deeply...",Produced by smallholding farmers in the Nakuru...,"A complex, savory-sweet, rich-toned single-ori..."
4,Tang Bohu Espresso Blend,Ethiopia; Kenya; Colombia; Costa Rica,95,Simon Hsieh Aroma Roast Coffees,9.0,0.0,9.0,9.0,9.0,"Evaluated as espresso. Deeply sweet-savory, ch...",A blend of coffees from a variety of origins u...,"A richly chocolaty, deeply aromatic, spice-ton..."
