# Collaborative Filtering (ALS)

In [None]:
!pip install huggingface_hub[hf_transfer] -U
!export HF_HUB_ENABLE_HF_TRANSFER=1
!huggingface-cli download HangenYuu/Steam_Games_Review processed/games_reviews.parquet --repo-type dataset --local-dir .

Collecting huggingface_hub[hf_transfer]
  Downloading huggingface_hub-0.26.2-py3-none-any.whl.metadata (13 kB)
Collecting hf-transfer>=0.1.4 (from huggingface_hub[hf_transfer])
  Downloading hf_transfer-0.1.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.7 kB)
Downloading hf_transfer-0.1.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m22.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading huggingface_hub-0.26.2-py3-none-any.whl (447 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m447.5/447.5 kB[0m [31m19.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: hf-transfer, huggingface_hub
  Attempting uninstall: huggingface_hub
    Found existing installation: huggingface-hub 0.24.7
    Uninstalling huggingface-hub-0.24.7:
      Successfully uninstalled huggingface-hub-0.24.7
Successfully installed hf-transfer-0.1.8 huggingface_h

In [None]:
from pyspark.sql import SparkSession
from pyspark.ml.recommendation import ALS
from pyspark.sql import functions as F

spark = SparkSession.builder.appName("RecommendationSystem").getOrCreate()

In [None]:
# Load your data
data = spark.read.parquet("/content/processed/games_reviews.parquet", header=True, inferSchema=True)

In [None]:
data = data.select('user_id', 'game_id', 'recommendation')
data = data.withColumn('recommendation_numeric', F.col('recommendation').cast('int')).drop('recommendation')

In [None]:
# Create an ALS model
als = ALS(
    maxIter=10,
    regParam=0.1,
    userCol='user_id',
    itemCol='game_id',
    ratingCol='recommendation_numeric',
    coldStartStrategy='drop'
)

model = als.fit(data)

In [None]:
user_recommendations = model.recommendForAllUsers(10)
user_recommendations.coalesce(1).write.parquet("/content/user_recommendations.parquet", mode="overwrite")

In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) 
Token is valid (permission: write).
The token `Write Token` has been saved to /root/.cache/huggingface/stored_tokens
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authent

In [None]:
!export HF_HUB_ENABLE_HF_TRANSFER=1
!huggingface-cli upload HangenYuu/Steam_Games_Review /content/user_recommendations.parquet/part-00000-674a66e6-9a1e-4855-b492-c2557d6577f1-c000.snappy.parquet ./results/user_recommendations.parquet --repo-type=dataset

Consider using `hf_transfer` for faster uploads. This solution comes with some limitations. See https://huggingface.co/docs/huggingface_hub/hf_transfer for more details.
part-00000-674a66e6-9a1e-4855-b492-c2557d6577f1-c000.snappy.parquet: 100% 15.3M/15.3M [00:00<00:00, 34.3MB/s]
https://huggingface.co/datasets/HangenYuu/Steam_Games_Review/blob/main/./results/user_recommendations.parquet
