# Content Based Recommendation

In [3]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, udf, desc, broadcast, concat, lit
from pyspark.ml.feature import Tokenizer, StopWordsRemover, HashingTF, IDF, Normalizer, PCA
import numpy as np

# Intializer Spark Session
# Set Driver and Exectutor Memory
spark = SparkSession.builder \
        .appName("Content-basedRecommendationSystem") \
        .config("spark.driver.memory", "4g") \
        .config("sparl.executor.memory", "4g") \
        .config("spark.driver.extraJavaOptions", "-Dfile.encoding=UTF-8") \
        .getOrCreate()

In [10]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [1]:
from google.colab import files
uploaded = files.upload()

Saving transactions_data.csv to transactions_data.csv


#### Load Dataset and Select Relevant Columns

In [6]:
from os import truncate
# Load Transactions Data
transactions_data = spark.read.csv("transactions_data.csv", header=True)

# Select distinict products and build features combining product_name, category and product_description
df= transactions_data\
    .select(
        col("product_name"),
        col("product_description"),
        col("category"),
        concat(
            col("product_name"), lit(" "),
            col("product_description"), lit(" "),
            col("category")).alias("full_product_description"))\
    .distinct()

# Show data
df.show(10, truncate=0)

+-------------+-------------------------------------------------+--------------------+----------------------------------------------------------------------+
|product_name |product_description                              |category            |full_product_description                                              |
+-------------+-------------------------------------------------+--------------------+----------------------------------------------------------------------+
|Paper Towels |Ultra-absorbent paper towels.                    |Household Essentials|Paper Towels Ultra-absorbent paper towels. Household Essentials       |
|Apples       |Fresh organic apples, rich in fiber and vitamins.|Fresh Produce       |Apples Fresh organic apples, rich in fiber and vitamins. Fresh Produce|
|Almonds      |Roasted almonds, a healthy snack.                |Snacks & Sweets     |Almonds Roasted almonds, a healthy snack. Snacks & Sweets             |
|Milk         |Whole organic milk, high in calcium a