In [11]:
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession

In [12]:
from modules.data_preprocessing import DataPreprocessing
from modules.models import ProcessModels
from modules.recommendation import Recommendation

In [13]:
SparkContext.setSystemProperty('spark.hadoop.dfs.client.use.datanode.hostname', 'true')
conf = (
	SparkConf()
	.setAppName("recommendation_app")
	.set("spark.driver.maxResultSize", "2g")
	.set("spark.executor.memory", "4g")
)
sc = SparkContext(conf=conf)
spark = SparkSession.builder.config(conf=conf).getOrCreate()

In [14]:
sc

In [15]:
spark

In [16]:
# Set checkpoint directory
sc.setCheckpointDir("checkpoints")

In [17]:
recommendation = Recommendation(
    process_model_module=ProcessModels(DataPreprocessing(), spark=spark),
)

In [18]:
# Get all unique ReviewerIDs and corresponding ReviewerNames
user_ids = DataPreprocessing().get_all_user_ids()
user_ids

In [19]:
# Train the model
recommendation.pyspark_recommender(
    current_user_id=None,  # Not needed for training
    num_recommendations=None,  # Not needed for training
    mode='training',
    model_save_path="saved_model/als_model",
)

In [20]:
# Predict using the trained model
recommendations, user_history = recommendation.pyspark_recommender(
    current_user_id='13d4a80599cf4c4f8f4c3b511e25a4d9',  # By pooja s
    num_recommendations=10,
    mode='predict',
    model_save_path="saved_model/als_model"
)

In [21]:
recommendations

In [22]:
user_history

## COMMENTS:
- About this user, Pooja: She’s a beginner and interested in beginner courses. Topics include Python, basic statistics, and Excel.
- The recommendations are mixed with beginner and intermediate courses. The intermediate courses are not suitable for her but show on other users' recommendations as well. Perhaps, the model is biased towards these intermediate courses for some reason. This could be due to the lack of data or the model's hyperparameters.
- The recommendations are still strong and relevant to the user's interests. For example, courses that are related to Python, basic statistics, and Excel are still recommended. Such as "Prompt Engineering for ChatGPT", "What is Data Science?", "Foundations: Data, Data, Everywhere", "What is Data Science?"

In [23]:
# Predict using the trained model
recommendations, user_history = recommendation.pyspark_recommender(
    current_user_id='03223df4cd824c43b765115fa2b822e7',  # By Mark L
    num_recommendations=10,
    mode='predict',
    model_save_path="saved_model/als_model"
)

In [24]:
recommendations

In [25]:
user_history

## COMMENTS:
- About this user, Mark: He's interested in beginner courses, Deep Learning, Machine learning and AI for medical care or Excel.
- The recommendations are firmly relevant to the user's interests. For example, courses that are related medical field are recommended. Such as:
    - "Reverse and complement nucleic acid sequences (DNA, RNA) using Python"
    - "Advanced Clinical Data Science"

In [26]:
sc.stop()
spark.stop()