In [1]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('movie').master('local').getOrCreate()

In [2]:
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.ml.recommendation import ALS

In [3]:
data = spark.read.csv('Meal_Info.csv', inferSchema=True, header=True)

In [4]:
data.printSchema()

root
 |-- movieId: integer (nullable = true)
 |-- rating: double (nullable = true)
 |-- userId: integer (nullable = true)
 |-- mealskew: double (nullable = true)
 |-- meal_name: string (nullable = true)



In [5]:
data.show()

+-------+------+------+--------+--------------------+
|movieId|rating|userId|mealskew|           meal_name|
+-------+------+------+--------+--------------------+
|      2|   3.0|     0|     2.0|       Chicken Curry|
|      3|   1.0|     0|     3.0|Spicy Chicken Nug...|
|      5|   2.0|     0|     5.0|           Hamburger|
|      9|   4.0|     0|     9.0|       Taco Surprise|
|     11|   1.0|     0|    11.0|            Meatloaf|
|     12|   2.0|     0|    12.0|        Ceaser Salad|
|     15|   1.0|     0|    15.0|            BBQ Ribs|
|     17|   1.0|     0|    17.0|         Sushi Plate|
|     19|   1.0|     0|    19.0|Cheesesteak Sandw...|
|     21|   1.0|     0|    21.0|             Lasagna|
|     23|   1.0|     0|    23.0|      Orange Chicken|
|     26|   3.0|     0|    26.0|    Spicy Beef Plate|
|     27|   1.0|     0|    27.0|Salmon with Mashe...|
|     28|   1.0|     0|    28.0| Penne Tomatoe Pasta|
|     29|   1.0|     0|    29.0|        Pork Sliders|
|     30|   1.0|     0|    3

In [9]:
data = data.na.drop()

In [10]:
(training, testing) = data.randomSplit([0.8, 0.2])

In [11]:
als = ALS(maxIter=5, regParam=0.01, userCol='userId', itemCol='mealskew', ratingCol='rating')
model = als.fit(training)

In [14]:
predictions = model.transform(testing)
predictions.orderBy('prediction', ascending=False).show()

+-------+------+------+--------+--------------------+----------+
|movieId|rating|userId|mealskew|           meal_name|prediction|
+-------+------+------+--------+--------------------+----------+
|      2|   1.0|    26|     2.0|       Chicken Curry|  5.265823|
|     23|   1.0|    19|    23.0|      Orange Chicken| 4.7955775|
|     12|   3.0|     2|    12.0|        Ceaser Salad| 4.0295153|
|      0|   1.0|     6|     0.0|        Cheese Pizza| 3.8290668|
|     22|   4.0|    17|    22.0|   Pulled Pork Plate| 3.6180391|
|     24|   5.0|    26|    24.0|               Chili| 3.4270978|
|      7|   1.0|    28|     7.0|              Nachos| 3.0584855|
|     20|   1.0|    26|    20.0|     Southwest Salad| 3.0316865|
|     28|   1.0|     0|    28.0| Penne Tomatoe Pasta| 2.9707906|
|     18|   1.0|     8|    18.0|     Pepperoni Pizza| 2.9234595|
|      2|   3.0|     9|     2.0|       Chicken Curry| 2.8596983|
|     18|   4.0|     3|    18.0|     Pepperoni Pizza| 2.8463178|
|     24|   1.0|     9|  

In [17]:
evaluator = RegressionEvaluator(metricName='rmse', labelCol='rating', predictionCol='prediction')
rmse = evaluator.evaluate(predictions)
print('Root Mean Square Error = '+str(rmse))

Root Mean Square Error = 1.5224437978111658
