In [1]:
import findspark
findspark.init('D:\Hadoop_Ecosystem\spark-3.5.0-bin-hadoop3')

from pyspark import SparkContext
from pyspark.sql import SparkSession

sc = SparkContext(appName="ALS").getOrCreate()
sc.setLogLevel("ERROR")

ss = SparkSession(sc)
ss

In [2]:
df_beer = ss.read.csv('beer_reviews.csv',inferSchema=True,header=True)
df_beer = df_beer.select([
    'beer_beerid', 
    'review_taste', 
    'review_appearance', 
    'review_palate', 
    'review_aroma', 
    'review_overall', 
    'review_profilename'
])
df_beer = df_beer.withColumnRenamed('beer_beerid', 'beer_id')
df_beer = df_beer.withColumnRenamed('review_profilename', 'user_id')
df_beer = df_beer.dropna()

#convert to pandas dataframe
df_beer = df_beer.toPandas()
df_beer['user_id'] = df_beer['user_id'].astype('category').cat.codes #each username is converted to a unique number

#convert back to spark dataframe
df_beer = ss.createDataFrame(df_beer)
df_beer.select('user_id').distinct().count()

33387

In [3]:
(training, test) = df_beer.randomSplit([0.8, 0.2])

In [4]:
from ml_models import MultiCriteriaALSModel
review_weight = {
    'review_taste': 0.25,
    'review_appearance': 0.1,
    'review_palate': 0.2,
    'review_aroma': 0.15,
    'review_overall': 0.3
}

model = MultiCriteriaALSModel(
    'user_id', 
    'beer_id', 
    review_weight
)

model.fit(training)
prediction = model.transform(test)
prediction.show()



+-------+-------+-----------------------+--------------------+----------------------------+------------------------+-----------------------+-------------------------+-----------------------+
|user_id|beer_id|prediction_review_taste|init_combined_rating|prediction_review_appearance|prediction_review_palate|prediction_review_aroma|prediction_review_overall|predict_combined_rating|
+-------+-------+-----------------------+--------------------+----------------------------+------------------------+-----------------------+-------------------------+-----------------------+
|  20396|   5785|              3.3563235|                4.85|                   4.0764475|               3.7865183|              3.5144548|                3.9560137|     3.7180016160011293|
|  32554|  36270|               3.210523|  3.5999999999999996|                   3.6166172|                3.058903|               3.666088|                 3.368969|     3.3366769433021544|
|  13795|   2626|              3.4066162|    

In [5]:
prediction.select('user_id', 'beer_id', 'predict_combined_rating', 'init_combined_rating').show()

+-------+-------+-----------------------+--------------------+
|user_id|beer_id|predict_combined_rating|init_combined_rating|
+-------+-------+-----------------------+--------------------+
|  20396|   5785|     3.7180016160011293|                4.85|
|  32554|  36270|     3.3366769433021544|  3.5999999999999996|
|  13795|   2626|      3.360486197471619|                3.75|
|   5457|   1664|       3.36596794128418|               3.325|
|  15634|  25852|      4.025994157791138|                4.25|
|   7359|   3558|     3.8791313767433167|  3.7750000000000004|
|  11450|   6549|      3.905474531650543|  4.2749999999999995|
|  11450|   6549|      3.905474531650543|  4.2749999999999995|
|  11450|   6549|      3.905474531650543|  4.2749999999999995|
|  11450|   6549|      3.905474531650543|  4.2749999999999995|
|  11450|   6549|      3.905474531650543|  4.2749999999999995|
|  11450|   6549|      3.905474531650543|  4.2749999999999995|
|  11450|   6549|      3.905474531650543|  4.2749999999

In [None]:
# model.recommendForUser(11151, 5).show()