# This notebook features Collaborative filtering using matrix factorization

## Loading in data

In [1]:
import pandas as pd
import numpy as np

from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import linear_kernel, euclidean_distances
from sklearn.preprocessing import normalize

In [2]:
user_item = pd.DataFrame(
    pd.read_csv('/home/grimoire/Projects/BeerRatings/user_item_matrix.csv', index_col='review_profilename'))

# Looking at the shape of our data (The user-item matrix)

In [3]:
user_item.shape

(3156, 13831)

## Transposing our matrix to item-user

In [4]:
X = user_item.values.T

In [5]:
# Verifying the transposition
X.shape

(13831, 3156)

## Building our pearson coeffienct matrix using the transformed data matrix

In [6]:
similarity_measures = {}

In [7]:
import warnings
warnings.filterwarnings("ignore", category = RuntimeWarning)
import concurrent.futures

# Use all available processors to complete process
with concurrent.futures.ProcessPoolExecutor() as executor : 
    
    svd_corr = TruncatedSVD(n_components=75, random_state=3)
    corr_matrix = svd_corr.fit_transform(X)
    similarity_measures['pearson'] = np.corrcoef(corr_matrix)
    
    svd_csine = TruncatedSVD(n_components=1500, random_state=6)
    csine_matrix = svd_csine.fit_transform(X)
    cosine_sim = linear_kernel(csine_matrix, csine_matrix)
    similarity_measures['cosine'] = normalize(cosine_sim)
    
    svd_eucl = TruncatedSVD(n_components=625, random_state=9)
    eucl_matrix = svd_eucl.fit_transform(X)
    similarity_measures['euclidean'] = euclidean_distances(eucl_matrix, eucl_matrix)

In [8]:
def recommendations(beer, measure='cosine', user_item=user_item) :
    
    #############################################################
    # The coeffecient matrix is an 2D array of arrays
    # There is no indication of what columns refer to what beer
    # We will pull this information from our user_item matrix
    # Create a list of the beers from the columns
    # The index of the list will correspond with the coefficient matrix
    #############################################################
    
    # https://stackoverflow.com/questions/944592/best-practice-for-python-assert
    # Self note: build an exception class for beer parameter
    
    # User input not matching beer_names, add to exception class
    assert beer in user_item.columns, \
    'The beer isn\'t currently in the recommendation list,\
     rate it more and it will be soon!'
    
    # User input for measure not available for use
    assert measure in similarity_measures.keys(), \
    'That measure isn\'t currently available'
    
    beer_name_list = list(user_item.columns) # Contains the index/name of the beer
    beer_index = beer_name_list.index(beer) # Finds the index of the beer parameter
    beer_list = \
    pd.DataFrame({'score':similarity_measures[measure][beer_index]})\
    .sort_values(by='score', 
                 ascending=False)[1:8].index
    return list(user_item.columns[beer_list])

In [9]:
recommendations('Furious', measure='cosine')

["Bell's Hopslam Ale",
 'Pliny The Elder',
 'Founders Breakfast Stout',
 'Founders KBS (Kentucky Breakfast Stout)',
 'Two Hearted Ale',
 'Bourbon County Brand Stout',
 'Old Rasputin Russian Imperial Stout']

In [10]:
recommendations('Furious', measure='pearson')

['Bender',
 'Coffee Bender',
 'CynicAle',
 'Darkness',
 'Raspberry Tart',
 'Abrasive Ale',
 'Masala Mama India Pale Ale']

In [11]:
recommendations('Furious', measure='euclidean')

['La Fin Du Monde',
 'Duvel',
 'Old Rasputin Russian Imperial Stout',
 'Arrogant Bastard Ale',
 'Sierra Nevada Celebration Ale',
 'Dead Guy Ale',
 'St. Bernardus Abt 12']

In [12]:
# Random beer selection and recommendations

random = np.random.randint(0,(len(user_item.columns)-1))
beer = user_item.columns[random]
print(beer)
recommendations(beer, measure='cosine')

Clear Creek Ice


['Sierra Nevada Celebration Ale',
 'Samuel Adams Boston Lager',
 'Sierra Nevada Summerfest Lager',
 'Samuel Adams Noble Pils',
 'Buried Hatchet Stout',
 'Hoegaarden Original White Ale',
 'Pale Ale']