# Importing Necessary Libraries

In [7]:
import pandas as pd
from surprise.model_selection import cross_validate
from surprise.prediction_algorithms import SVD
from surprise.prediction_algorithms import KNNWithMeans, KNNBasic, KNNBaseline
from surprise.model_selection import GridSearchCV
import numpy as np

In [8]:
df = pd.read_csv('final_whiskey_data.csv')
df = df.drop('Unnamed: 0', axis = 1)
df.shape

(41018, 7)

# Need to make a Dataframe with only User, Item, and Rating for Surprise

In [9]:
rec_df = df.drop(columns = ['url', 'price(euro)', 'price(dollar)'])
rec_df = rec_df[['User_ID', 'Whiskey_ID', 'rating']]

# DataFrame with unique WhiskeyID's to provide all the information from the recommender

In [10]:
rec_match = df[['url', 'whiskey', 'Whiskey_ID', 'price(dollar)', 'price(euro)']].drop_duplicates('Whiskey_ID')

In [11]:
rec_match.head()

Unnamed: 0,url,whiskey,Whiskey_ID,price(dollar),price(euro)
0,https://www.whiskybase.com/whiskies/whisky/128...,1770-glasgow-single-malt,0,56.99,50.88
6,https://www.whiskybase.com/whiskies/whisky/125...,a-dream-of-scotland-islay-cask-strength-bw,1,89.59,79.99
53,https://www.whiskybase.com/whiskies/whisky/126...,a-dream-of-scotland-the-old-lady-of-islay-bw,2,87.25,77.9
85,https://www.whiskybase.com/whiskies/whisky/129...,aberfeldy-1996-ca,3,97.44,87.0
87,https://www.whiskybase.com/whiskies/whisky/131...,aberfeldy-1998,4,319.2,285.0


In [12]:
rec_df.User_ID.max()

1222

# Fitting the model with our data

In [13]:
from surprise import Reader, Dataset
reader = Reader(rating_scale = (1,100))
data = Dataset.load_from_df(rec_df,reader)

In [14]:
from surprise.model_selection import train_test_split
from surprise import accuracy
# Splitting the data to see how accurate our model will be predicting a rating
trainset, testset = train_test_split(data, test_size=0.25, random_state = 100)

In [15]:
svd = SVD()
svd.fit(trainset)
preds = svd.test(testset)

In [16]:
# Rating the Model
accuracy.rmse(preds)

RMSE: 2.3678


2.3677708805128987

# Test function to see how well our model predicts

In [17]:
def recommend_whiskey(uid, iid):
    pred = svd.predict(uid, iid, verbose=True)
    match = rec_match.loc[rec_match['Whiskey_ID'] == iid]
    return match

In [18]:
recommend_whiskey(522,10)

user: 522        item: 10         r_ui = None   est = 89.42   {'was_impossible': False}


Unnamed: 0,url,whiskey,Whiskey_ID,price(dollar),price(euro)
115,https://www.whiskybase.com/whiskies/whisky/129...,affinity-cb,10,128.25,114.51


<h3>Recommend to a new user

In [366]:
def whiskey_rater(df, num, priceUSD=None):
    userID = rec_df.User_ID.max()+1
    rating_list = []
    while num > 0:
        if priceUSD:
            whiskey = df[df['price(dollar)'] < priceUSD].sample(1)
        else:
            whiskey = df.sample(1)
        print(whiskey)
        rating = input('How do you rate this whiskey on a scale of 1-100, press n if you are not familiar with it. :\n')
        if int(rating) > 100:
            rating = 100
            
        if rating == 'n':
            continue
        else:
            rating_one_whiskey = {'User_ID':userID,'Whiskey_ID': whiskey['Whiskey_ID'].values[0],'rating': rating}
            rating_list.append(rating_one_whiskey) 
            num -= 1
            
    return rating_list

In [367]:
user_ratings = whiskey_rater(df, 5, 200)

      rating                                                url  \
2570      87  https://www.whiskybase.com/whiskies/whisky/129...   

                    whiskey  User_ID  Whiskey_ID  price(euro)  price(dollar)  
2570  highland-park-2001-gm      547         573       153.33         171.73  
How do you rate this whiskey on a scale of 1-100, press n if you are not familiar with it. :
200
     rating                                                url  \
163      80  https://www.whiskybase.com/whiskies/whisky/125...   

                               whiskey  User_ID  Whiskey_ID  price(euro)  \
163  ao-a-blend-of-five-major-whiskies      413          14         50.0   

     price(dollar)  
163           56.0  
How do you rate this whiskey on a scale of 1-100, press n if you are not familiar with it. :
105
      rating                                                url  \
4582      84  https://www.whiskybase.com/whiskies/whisky/128...   

                     whiskey  User_ID  Whiskey_ID 

In [368]:
user_ratings

[{'User_ID': 1223, 'Whiskey_ID': 573, 'rating': 100},
 {'User_ID': 1223, 'Whiskey_ID': 14, 'rating': 100},
 {'User_ID': 1223, 'Whiskey_ID': 1078, 'rating': 100},
 {'User_ID': 1223, 'Whiskey_ID': 612, 'rating': 100},
 {'User_ID': 1223, 'Whiskey_ID': 1043, 'rating': 100}]

In [362]:
## add the new ratings to the original ratings DataFrame
def get_recommendations(user_ratings, num_recommendations):
    new_ratings_df = rec_df.append(user_ratings,ignore_index=True)
    #load in new df
    new_data = Dataset.load_from_df(new_ratings_df,reader)
    #create new svd object
    svd_new = SVD()
    #re fit the model
    svd_new.fit(new_data.build_full_trainset())
    
    # make predictions for the user
    list_of_whiskies = []
    for w_id in new_ratings_df['Whiskey_ID'].unique():
        list_of_whiskies.append((w_id, svd_.predict(new_ratings_df['User_ID'].max(),w_id)[3]))
    
    # order the predictions from highest to lowest rated
    ranked_whiskies = sorted(list_of_whiskies, key=lambda x:x[1],reverse=True)
    
    
    for i in ranked_whiskies[:num_recommendations]:
        print(rec_match.loc[rec_match['Whiskey_ID'] == i[0]])
    
    
#     return ranked_whiskies[:num_recommendations]

In [363]:
get_recommendations(user_ratings, 10)

                                                    url            whiskey  \
2792  https://www.whiskybase.com/whiskies/whisky/129...  kilkerran-2006-ca   

      Whiskey_ID  price(dollar)  price(euro)  
2792         616          98.56         88.0  
                                                    url             whiskey  \
4722  https://www.whiskybase.com/whiskies/whisky/244...  laphroaig-1967-rwd   

      Whiskey_ID  price(dollar)  price(euro)  
4722         636        45360.0      40500.0  
                                                     url  \
36232  https://www.whiskybase.com/whiskies/whisky/442...   

                      whiskey  Whiskey_ID  price(dollar)  price(euro)  
36232  highland-park-thorfinn         580        1496.47      1336.13  
                                                     url         whiskey  \
26749  https://www.whiskybase.com/whiskies/whisky/977...  ardbeg-1975-dl   

       Whiskey_ID  price(dollar)  price(euro)  
26749          38        6481.

In [321]:
# return the top n recommendations using the 
def recommended_movies(user_ratings,movie_title_df,n):
        for idx, rec in enumerate(user_ratings):
            title = movie_title_df.loc[movie_title_df['movieId'] == int(rec[0])]['title']
            print('Recommendation # ',idx+1,': ',title,'\n')
            n-= 1
            if n == 0:
                break

Unnamed: 0,User_ID,Whiskey_ID,rating
41018,1223,523,64
41019,1223,617,80
41020,1223,326,76
41021,1223,422,94
41022,1223,616,100


In [323]:
# #retrain model
# svd_ = SVD(n_factors= 50, reg_all=0.05)
# svd_.fit(new_data.build_full_trainset())

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x118aca3c8>

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x118239f60>

In [327]:
# make predictions for the user
# you'll probably want to create a list of tuples in the format (whiskey_id, predicted_score)
list_of_whiskies = []
for w_id in new_ratings_df['Whiskey_ID'].unique():
    list_of_whiskies.append((w_id, svd_.predict(new_ratings_df['User_ID'].max(),w_id)[3]))


In [329]:
# order the predictions from highest to lowest rated
ranked_whiskies = sorted(list_of_whiskies, key=lambda x:x[1],reverse=True)

In [330]:
ranked_whiskies

[(616, 97.43232012016195),
 (636, 96.63568093798703),
 (580, 94.86311260176791),
 (38, 94.65125672599899),
 (681, 94.21459470476482),
 (637, 94.20680008758842),
 (172, 94.05259416055023),
 (1043, 94.0522059436128),
 (516, 93.85499043027228),
 (477, 93.84044539434964),
 (845, 93.82941767925414),
 (255, 93.82473727091403),
 (576, 93.81900993560372),
 (448, 93.79860285637865),
 (205, 93.73696540533702),
 (152, 93.65928864898943),
 (222, 93.64560845153213),
 (251, 93.53195059226469),
 (170, 93.52857773459196),
 (1044, 93.46984436342395),
 (1012, 93.43075399014924),
 (645, 93.3208105890292),
 (465, 93.30845009215135),
 (353, 93.28181171452171),
 (422, 93.25301473752536),
 (742, 93.15846123085937),
 (762, 93.09543522931918),
 (682, 93.06666318270985),
 (455, 93.01782903758267),
 (730, 93.00438118174374),
 (352, 92.98810155037286),
 (732, 92.97144229841113),
 (440, 92.943235635491),
 (449, 92.92287430525343),
 (684, 92.88825900107999),
 (547, 92.88183454147024),
 (375, 92.83144647945824),
 (1