# Deployment

In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, udf, desc, broadcast, concat, lit, to_timestamp, concat_ws, row_number, from_json, collect_list
from pyspark.sql.window import Window
from pyspark.ml.feature import Tokenizer, StopWordsRemover, HashingTF, IDF, Normalizer, PCA, IDFModel
from pyspark.sql.types import StringType, StructType, StructField
import numpy as np
import os

os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages org.apache.spark:spark-streaming-kafka-0-10_2.12:3.2.0,org.apache.spark:spark-sql-kafka-0-10_2.12:3.2.0 pyspark-shell'

# Intialize Spark Session
# Set Driver & Executor Memory
spark = SparkSession.builder \
    .appName("Content-basedRecommendationSystem") \
    .config("spark.driver.memory", "4g") \
    .config("spark.executor.memory", "4g") \
    .config("spark.driver.extraJavaOptions", "-Dfile.encoding=UTF-8") \
    .getOrCreate()

In [2]:
from google.colab import drive, files
drive.mount("/content/drive")

Mounted at /content/drive


In [3]:
uploaded = files.upload()

Saving transactions_data.csv to transactions_data.csv


#### Load Dataset and Select Relevant Columns

In [4]:
# Load transactions data
transactions_data = spark.read.csv("transactions_data.csv", header=True)

# Select distinct products and build a feature combining product_name, category and product_description
df = transactions_data\
    .select(
        col("product_name"),
        col("product_description"),
        col("category"),
        concat(
            col("product_name"), lit(" "),
            col("product_description"), lit(" "),
            col("category")).alias("full_product_description"))\
    .distinct()

# Show data
df.show(10, truncate=0)

+-------------+-------------------------------------------------+--------------------+----------------------------------------------------------------------+
|product_name |product_description                              |category            |full_product_description                                              |
+-------------+-------------------------------------------------+--------------------+----------------------------------------------------------------------+
|Paper Towels |Ultra-absorbent paper towels.                    |Household Essentials|Paper Towels Ultra-absorbent paper towels. Household Essentials       |
|Apples       |Fresh organic apples, rich in fiber and vitamins.|Fresh Produce       |Apples Fresh organic apples, rich in fiber and vitamins. Fresh Produce|
|Almonds      |Roasted almonds, a healthy snack.                |Snacks & Sweets     |Almonds Roasted almonds, a healthy snack. Snacks & Sweets             |
|Milk         |Whole organic milk, high in calcium a