In [1]:
import findspark
findspark.init('D:\Hadoop_Ecosystem\spark-3.5.0-bin-hadoop3')

from pyspark import SparkContext
from pyspark.sql import SparkSession

sc = SparkContext(appName="ALS").getOrCreate()
sc.setLogLevel("ERROR")

ss = SparkSession(sc)
ss

In [2]:
# TODO change the path to another path since beer_reviews.csv is moved to another folder
df_beer = ss.read.csv('beer_reviews.csv',inferSchema=True,header=True)
df_beer = df_beer.select([
    'beer_beerid', 
    'review_taste', 
    'review_appearance', 
    'review_palate', 
    'review_aroma', 
    'review_overall', 
    'review_profilename'
])
df_beer = df_beer.withColumnRenamed('beer_beerid', 'beer_id')
df_beer = df_beer.withColumnRenamed('review_profilename', 'user_id')
df_beer = df_beer.dropna()

#convert to pandas dataframe
df_beer = df_beer.toPandas()
df_beer['user_id'] = df_beer['user_id'].astype('category').cat.codes #each username is converted to a unique number

#convert back to spark dataframe
df_beer = ss.createDataFrame(df_beer)
df_beer.select('user_id').distinct().count()

33387

In [3]:
(training, test) = df_beer.randomSplit([0.8, 0.2])

In [4]:
from ml_models import MultiCriteriaALSModel
review_weight = {
    'review_taste': 0.25,
    'review_appearance': 0.1,
    'review_palate': 0.2,
    'review_aroma': 0.15,
    'review_overall': 0.3
}

model = MultiCriteriaALSModel(
    'user_id', 
    'beer_id', 
    review_weight
)

model.fit(training)
prediction = model.transform(test)
prediction.show()



+-------+-------+-----------------------+--------------------+----------------------------+------------------------+-----------------------+-------------------------+-----------------------+
|user_id|beer_id|prediction_review_taste|init_combined_rating|prediction_review_appearance|prediction_review_palate|prediction_review_aroma|prediction_review_overall|predict_combined_rating|
+-------+-------+-----------------------+--------------------+----------------------------+------------------------+-----------------------+-------------------------+-----------------------+
|  22107|    276|               3.551606|                4.15|                   3.6876032|               3.5517986|              3.3905306|                3.6639833|     3.5747961163520814|
|  17193|  17538|               4.053382|               4.675|                    3.908098|               3.8911304|               4.160838|                3.7921162|     3.9441419363021852|
|  11402|   7463|              3.9573524|  3.

In [None]:
prediction.select('user_id', 'beer_id', 'predict_combined_rating', 'init_combined_rating').show()

In [None]:
model.recommendForUser(11151, 5).show()

In [5]:
model.recommendForAllUsers(5).show()

+-------+--------------------+--------------------+
|user_id|               items|             ratings|
+-------+--------------------+--------------------+
|      0|[42587, 10325, 22...|[3.56320463418960...|
|      7|         [135, 4700]|[4.09011514186859...|
|     25|[35409, 41899, 21...|[4.3664311170578,...|
|     26|      [73427, 50235]|[4.66140935420989...|
|     29|   [99, 42353, 8998]|[3.95344327688217...|
|     32|               [276]|[3.1124522805213926]|
|     57|[53913, 36185, 57...|[3.07213504314422...|
|     68|               [848]| [2.669730496406555]|
|     71|[1128, 727, 782, ...|[4.05605328083038...|
|     84|              [1745]| [3.867021989822388]|
|     95|              [6499]|[1.4767994821071624]|
|    116|       [3432, 30840]|[4.36471152305603...|
|    119|        [2684, 2093]|[3.76931705474853...|
|    126|[60780, 41778, 52...|[3.71307334899902...|
|    136|[36727, 808, 1042...|[3.69951361417770...|
|    145|[1658, 29145, 568...|[3.92948464155197...|
|    157|[13