# <center>Recommender System</center>

The recommender system uses Sci-kit Learn's pairwise_distances and cosine_similarity. 

In [1]:
import pandas as pd
from scipy import sparse
from sklearn.metrics.pairwise import pairwise_distances, cosine_similarity
import sys

In [2]:
#read in data
km_df = pd.read_csv('../data/km_data.csv')
km_df.head()

Unnamed: 0,review_profilename,beer_beerid,beer_name,beer_abv,beer_style,review_overall,review_aroma,review_appearance,review_palate,review_taste,km_labels
0,Nibbley,776,Samichlaus Bier,14.0,Doppelbock,5.0,5.0,5.0,5.0,5.0,1
1,BeerTitan,1441,Young's Old Nick Barley Wine Ale,7.2,English Barleywine,4.5,4.5,4.0,4.5,4.0,3
2,Morey,46809,Flashback Ale,6.88,American Brown Ale,3.5,3.5,4.0,4.0,3.5,0
3,tempest,2704,St-Ambroise Oatmeal Stout,5.0,Oatmeal Stout,4.0,5.0,4.0,4.0,3.5,5
4,KoG,248,Hoegaarden Original White Ale,4.9,Witbier,5.0,3.5,4.0,4.5,4.5,3


In [3]:
#Lowercased items in 'beer_style' column
km_df['beer_style'] = km_df['beer_style'].map(lambda x: x.strip().lower())

In [4]:
# features = ['review_overall', 
#           'review_aroma', 
#           'review_appearance', 
#           'review_palate', 
#           'review_taste', 
#           'km_labels']
pivot = km_df.pivot_table(index='beer_style',
                         columns='review_profilename',
                         values='km_labels')

### Sparse Marix

In [5]:
pivot_sparse = sparse.csr_matrix(pivot.fillna(0))

In [6]:
print(pivot_sparse)
type(pivot_sparse)

  (0, 293)	3.5
  (0, 388)	3.0
  (0, 606)	10.0
  (0, 610)	3.0
  (0, 614)	6.0
  (0, 649)	1.0
  (0, 680)	4.0
  (0, 711)	5.0
  (0, 822)	2.0
  (0, 853)	3.0
  (0, 927)	5.0
  (0, 1262)	5.0
  (0, 1444)	8.0
  (0, 1538)	5.0
  (0, 1597)	10.0
  (0, 1699)	1.0
  (0, 1722)	6.0
  (0, 1736)	7.0
  (0, 1779)	5.0
  (0, 1797)	4.0
  (0, 1810)	5.0
  (0, 1877)	5.0
  (0, 1950)	1.0
  (0, 1968)	3.0
  (0, 2011)	5.0
  :	:
  (103, 4741)	5.0
  (103, 4798)	6.0
  (103, 4825)	4.0
  (103, 4832)	4.0
  (103, 4845)	6.0
  (103, 4851)	6.0
  (103, 4914)	6.0
  (103, 4927)	6.0
  (103, 4964)	7.0
  (103, 4995)	4.0
  (103, 4996)	10.0
  (103, 5025)	2.0
  (103, 5031)	6.0
  (103, 5060)	5.0
  (103, 5101)	3.0
  (103, 5107)	5.0
  (103, 5207)	10.0
  (103, 5221)	3.0
  (103, 5243)	7.0
  (103, 5244)	10.0
  (103, 5289)	4.0
  (103, 5292)	5.0
  (103, 5302)	4.0
  (103, 5317)	5.0
  (103, 5332)	7.0


scipy.sparse.csr.csr_matrix

### Cosine Similarity

In [7]:
recommender = pairwise_distances(pivot_sparse, metric='cosine')

In [8]:
recommender.shape

(104, 104)

In [9]:
#Creating distances
recommender

array([[0.        , 0.92026274, 0.95430073, ..., 0.9732547 , 0.95965707,
        0.99026472],
       [0.92026274, 0.        , 0.90577457, ..., 0.98338908, 0.97252677,
        0.94428701],
       [0.95430073, 0.90577457, 0.        , ..., 0.9653799 , 0.91392786,
        0.92474902],
       ...,
       [0.9732547 , 0.98338908, 0.9653799 , ..., 0.        , 0.93797312,
        0.94662761],
       [0.95965707, 0.97252677, 0.91392786, ..., 0.93797312, 0.        ,
        0.91372916],
       [0.99026472, 0.94428701, 0.92474902, ..., 0.94662761, 0.91372916,
        0.        ]])

In [10]:
recommender_df = pd.DataFrame(recommender, 
                              index=pivot.index,
                             columns=pivot.index)
recommender_df.head()

beer_style,altbier,american adjunct lager,american amber / red ale,american amber / red lager,american barleywine,american black ale,american blonde ale,american brown ale,american dark wheat ale,american double / imperial ipa,...,scotch ale / wee heavy,scottish ale,scottish gruit / ancient herbed ale,smoked beer,tripel,vienna lager,weizenbock,wheatwine,winter warmer,witbier
beer_style,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
altbier,0.0,0.920263,0.954301,0.991993,0.929122,0.984699,0.945842,0.935281,0.934137,0.939692,...,0.983389,0.960744,0.975973,1.0,0.934868,0.941281,0.971684,0.973255,0.959657,0.990265
american adjunct lager,0.920263,0.0,0.905775,0.97023,0.932784,0.986575,0.923783,0.954219,0.959886,0.931694,...,0.968508,0.952755,0.990762,0.97771,0.939943,0.926736,0.944149,0.983389,0.972527,0.944287
american amber / red ale,0.954301,0.905775,0.0,0.938177,0.922423,0.962319,0.954353,0.914668,0.965522,0.892577,...,0.913298,0.941392,0.973809,0.966674,0.935454,0.93953,0.958945,0.96538,0.913928,0.924749
american amber / red lager,0.991993,0.97023,0.938177,0.0,0.970851,0.952379,1.0,0.986713,1.0,0.931865,...,0.984845,0.988659,0.999332,0.949468,0.972597,0.93405,0.997821,1.0,0.977289,0.979507
american barleywine,0.929122,0.932784,0.922423,0.970851,0.0,0.970983,0.960687,0.935991,0.978168,0.885797,...,0.923289,0.961533,0.997864,1.0,0.922897,0.948042,0.956584,0.944326,0.953462,0.946632


In [11]:
recommender_df.to_csv('../data/recommender.csv')

In [12]:
rec = pd.read_csv('../data/recommender.csv', index_col=0)
rec.head()

Unnamed: 0_level_0,altbier,american adjunct lager,american amber / red ale,american amber / red lager,american barleywine,american black ale,american blonde ale,american brown ale,american dark wheat ale,american double / imperial ipa,...,scotch ale / wee heavy,scottish ale,scottish gruit / ancient herbed ale,smoked beer,tripel,vienna lager,weizenbock,wheatwine,winter warmer,witbier
beer_style,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
altbier,0.0,0.920263,0.954301,0.991993,0.929122,0.984699,0.945842,0.935281,0.934137,0.939692,...,0.983389,0.960744,0.975973,1.0,0.934868,0.941281,0.971684,0.973255,0.959657,0.990265
american adjunct lager,0.920263,0.0,0.905775,0.97023,0.932784,0.986575,0.923783,0.954219,0.959886,0.931694,...,0.968508,0.952755,0.990762,0.97771,0.939943,0.926736,0.944149,0.983389,0.972527,0.944287
american amber / red ale,0.954301,0.905775,0.0,0.938177,0.922423,0.962319,0.954353,0.914668,0.965522,0.892577,...,0.913298,0.941392,0.973809,0.966674,0.935454,0.93953,0.958945,0.96538,0.913928,0.924749
american amber / red lager,0.991993,0.97023,0.938177,0.0,0.970851,0.952379,1.0,0.986713,1.0,0.931865,...,0.984845,0.988659,0.999332,0.949468,0.972597,0.93405,0.997821,1.0,0.977289,0.979507
american barleywine,0.929122,0.932784,0.922423,0.970851,0.0,0.970983,0.960687,0.935991,0.978168,0.885797,...,0.923289,0.961533,0.997864,1.0,0.922897,0.948042,0.956584,0.944326,0.953462,0.946632


In [13]:
rec['hefeweizen'].sort_values()[1:6].index

Index(['english bitter', 'american ipa', 'german pilsener',
       'saison / farmhouse ale', 'belgian ipa'],
      dtype='object', name='beer_style')

In [14]:
rec[rec.index.str.contains('ipa')].index


Index(['american double / imperial ipa', 'american ipa', 'belgian ipa',
       'english india pale ale (ipa)'],
      dtype='object', name='beer_style')

In [15]:
#pulling top 10 beer styles with "american adjunct lager"
recommender_df['american adjunct lager'].sort_values()[1:11]

beer_style
german pilsener             0.881195
euro pale lager             0.892471
american pale lager         0.901628
american amber / red ale    0.905775
hefeweizen                  0.909173
english pale ale            0.911880
belgian strong pale ale     0.912321
american ipa                0.913619
english barleywine          0.914598
kvass                       0.914614
Name: american adjunct lager, dtype: float64

In [18]:
q = 'Belgian Strong Pale Ale'
beers = rec[rec.index.str.contains('ipa')].index
# beers = km_df[km_df['beer_style'].str.contains(q)]['beer_style']
beer_style = km_df['beer_style']

for beer in beers:
    print(beer_style)
    print()
#     print(recommender_df[beer_style].sort_values(by=beer_style)[1:6])
    print('---------------------------------------------------------')
    print()

0                 doppelbock
1         english barleywine
2         american brown ale
3              oatmeal stout
4                    witbier
                ...         
15180       english pale ale
15181    american pale lager
15182           vienna lager
15183               rye beer
15184    munich helles lager
Name: beer_style, Length: 15185, dtype: object

---------------------------------------------------------

0                 doppelbock
1         english barleywine
2         american brown ale
3              oatmeal stout
4                    witbier
                ...         
15180       english pale ale
15181    american pale lager
15182           vienna lager
15183               rye beer
15184    munich helles lager
Name: beer_style, Length: 15185, dtype: object

---------------------------------------------------------

0                 doppelbock
1         english barleywine
2         american brown ale
3              oatmeal stout
4                    witbier
   

In [19]:
# #calls query and beer dataframe
#recommender(q, recommender_df)
