In [1]:
pip install pyspark

Collecting pyspark
  Downloading pyspark-3.4.1.tar.gz (310.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.8/310.8 MB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyspark
  Building wheel for pyspark (setup.py) ... [?25l[?25hdone
  Created wheel for pyspark: filename=pyspark-3.4.1-py2.py3-none-any.whl size=311285387 sha256=23bc9db4c4337e68700df33a92ea3a587fcd8340d5e97a9dcebd6e9bf77782d8
  Stored in directory: /root/.cache/pip/wheels/0d/77/a3/ff2f74cc9ab41f8f594dabf0579c2a7c6de920d584206e0834
Successfully built pyspark
Installing collected packages: pyspark
Successfully installed pyspark-3.4.1


In [2]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, count, when
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
spark = SparkSession.builder.appName("Game_Recommendation_System").getOrCreate()

In [17]:
# Reading Data
recom = spark.read.csv("/content/recommendations.csv", header=True, inferSchema=True)
games = spark.read.csv("/content/games.csv", header=True, inferSchema=True)

In [18]:
games.show(10)

+------+--------------------+------------+----+-----+-----+---------------+--------------+------------+-----------+--------------+--------+----------+
|app_id|               title|date_release| win|  mac|linux|         rating|positive_ratio|user_reviews|price_final|price_original|discount|steam_deck|
+------+--------------------+------------+----+-----+-----+---------------+--------------+------------+-----------+--------------+--------+----------+
| 13500|Prince of Persia:...|  2008-11-21|true|false|false|  Very Positive|            84|        2199|       9.99|          9.99|     0.0|      true|
| 22364|BRINK: Agents of ...|  2011-08-03|true|false|false|       Positive|            85|          21|       2.99|          2.99|     0.0|      true|
|113020|Monaco: What's Yo...|  2013-04-24|true| true| true|  Very Positive|            92|        3722|      14.99|         14.99|     0.0|      true|
|226560|  Escape Dead Island|  2014-11-18|true|false|false|          Mixed|            61|    

In [19]:
recom.show(10)

+-------+-------+-----+-------------------+--------------+-----+-------+---------+
| app_id|helpful|funny|               date|is_recommended|hours|user_id|review_id|
+-------+-------+-----+-------------------+--------------+-----+-------+---------+
| 975370|      0|    0|2022-12-12 00:00:00|          true| 36.3|  47199|        0|
| 304390|      4|    0|2017-02-17 00:00:00|         false| 11.5|   2376|        1|
|1085660|      2|    0|2019-11-17 00:00:00|          true|336.5| 230757|        2|
| 703080|      0|    0|2022-09-23 00:00:00|          true| 27.4| 235736|        3|
| 526870|      0|    0|2021-01-10 00:00:00|          true|  7.9|  21721|        4|
| 306130|      0|    0|2021-10-10 00:00:00|          true|  8.6|  41543|        5|
| 238960|      0|    0|2017-11-25 00:00:00|          true|538.8|  80787|        6|
|    730|      0|    0|2021-11-30 00:00:00|         false|157.5|  57879|        7|
| 255710|      0|    0|2021-05-21 00:00:00|          true| 18.7| 321815|        8|
| 28

In [20]:
games.printSchema()

root
 |-- app_id: integer (nullable = true)
 |-- title: string (nullable = true)
 |-- date_release: date (nullable = true)
 |-- win: boolean (nullable = true)
 |-- mac: boolean (nullable = true)
 |-- linux: boolean (nullable = true)
 |-- rating: string (nullable = true)
 |-- positive_ratio: integer (nullable = true)
 |-- user_reviews: integer (nullable = true)
 |-- price_final: double (nullable = true)
 |-- price_original: double (nullable = true)
 |-- discount: double (nullable = true)
 |-- steam_deck: boolean (nullable = true)



In [21]:
recom.printSchema()

root
 |-- app_id: integer (nullable = true)
 |-- helpful: integer (nullable = true)
 |-- funny: integer (nullable = true)
 |-- date: timestamp (nullable = true)
 |-- is_recommended: boolean (nullable = true)
 |-- hours: double (nullable = true)
 |-- user_id: integer (nullable = true)
 |-- review_id: integer (nullable = true)



In [22]:
# Deleting unnecessary columns
dropg = ['date_release', 'discount', 'steam_deck', 'price_original']
dropr = ['helpful', 'funny', 'date', 'review_id']

games = games.drop(*dropg)
recom = recom.drop(*dropr)

In [23]:
games.show(10)

+------+--------------------+----+-----+-----+---------------+--------------+------------+-----------+
|app_id|               title| win|  mac|linux|         rating|positive_ratio|user_reviews|price_final|
+------+--------------------+----+-----+-----+---------------+--------------+------------+-----------+
| 13500|Prince of Persia:...|true|false|false|  Very Positive|            84|        2199|       9.99|
| 22364|BRINK: Agents of ...|true|false|false|       Positive|            85|          21|       2.99|
|113020|Monaco: What's Yo...|true| true| true|  Very Positive|            92|        3722|      14.99|
|226560|  Escape Dead Island|true|false|false|          Mixed|            61|         873|      14.99|
|249050|Dungeon of the EN...|true| true|false|  Very Positive|            88|        8784|      11.99|
|250180|        METAL SLUG 3|true|false|false|  Very Positive|            90|        5579|       7.99|
|253980|             Enclave|true| true| true|Mostly Positive|           

In [24]:
recom.show(10)

+-------+--------------+-----+-------+
| app_id|is_recommended|hours|user_id|
+-------+--------------+-----+-------+
| 975370|          true| 36.3|  47199|
| 304390|         false| 11.5|   2376|
|1085660|          true|336.5| 230757|
| 703080|          true| 27.4| 235736|
| 526870|          true|  7.9|  21721|
| 306130|          true|  8.6|  41543|
| 238960|          true|538.8|  80787|
|    730|         false|157.5|  57879|
| 255710|          true| 18.7| 321815|
| 289070|          true|397.5| 412440|
+-------+--------------+-----+-------+
only showing top 10 rows



In [25]:
# Check null values in Data
null_games = games.select([count(when(col(i).isNull(),i)).alias(i) for i in games.columns])
null_games.show()

+------+-----+---+---+-----+------+--------------+------------+-----------+
|app_id|title|win|mac|linux|rating|positive_ratio|user_reviews|price_final|
+------+-----+---+---+-----+------+--------------+------------+-----------+
|     0|    0|  0|  0|    0|     0|             0|           0|          0|
+------+-----+---+---+-----+------+--------------+------------+-----------+



In [26]:
null_recom = recom.select([count(when(col(i).isNull(),i)).alias(i) for i in recom.columns])
null_recom.show()

+------+--------------+-----+-------+
|app_id|is_recommended|hours|user_id|
+------+--------------+-----+-------+
|     0|             0|    0|      0|
+------+--------------+-----+-------+



In [27]:
# Demo inputs
# Grand Theft Auto V
# Dying Light 2 Stay Human
# Cyberpunk 2077
# Red Dead Redemption 2
# Jurassic World Evolution 2
# FINAL FANTASY XIV Online
# Farming Simulator 22

In [28]:
# Taking a input game
input_game_name = input("Enter the name of the game: ")
target_app_id = games.filter(games.title == input_game_name).collect()
target_app_id = target_app_id[0]["app_id"]
target_app_id

Enter the name of the game: Rust


252490

In [29]:
filtered_games = games.filter(games.app_id == target_app_id)

filtered_games.show()

+------+-----+----+----+-----+-------------+--------------+------------+-----------+
|app_id|title| win| mac|linux|       rating|positive_ratio|user_reviews|price_final|
+------+-----+----+----+-----+-------------+--------------+------------+-----------+
|252490| Rust|true|true|false|Very Positive|            87|      761516|      39.99|
+------+-----+----+----+-----+-------------+--------------+------------+-----------+



In [30]:
# Checking how many users have played this game(app_id) from recom table
target_user_ids = recom.filter(recom.app_id == target_app_id)
target_user_ids.show(5)

+------+--------------+-----+--------+
|app_id|is_recommended|hours| user_id|
+------+--------------+-----+--------+
|252490|          true|  0.4| 1046893|
|252490|         false| 53.4| 1315766|
|252490|          true| 75.8| 4239702|
|252490|          true|698.9| 4538123|
|252490|          true|151.8|11963619|
+------+--------------+-----+--------+
only showing top 5 rows



In [31]:
# Sorting DataFrame(usr_id) by hours
# To get the players who played the game more. Because they liked the game more so they can give more accurate recommendation
# Selecting top 1000 players
usr_id = target_user_ids.orderBy("hours", ascending=False).limit(1000)
usr_id.show(5)

+------+--------------+-----+-------+
|app_id|is_recommended|hours|user_id|
+------+--------------+-----+-------+
|252490|          true|999.2|4900395|
|252490|          true|999.1|2634389|
|252490|         false|999.0|7567570|
|252490|          true|998.4|8953860|
|252490|          true|998.2|1002205|
+------+--------------+-----+-------+
only showing top 5 rows



In [32]:
# Extracting user_id of these users
users = usr_id.select("user_id").rdd.flatMap(lambda x: x).collect()

In [33]:
# Checking if recom['user_id'] isin users Series
# Filter recommendations based on selected users
final_df = recom.filter(recom.user_id.isin(users))
final_df.show(5)

+------+--------------+-----+--------+
|app_id|is_recommended|hours| user_id|
+------+--------------+-----+--------+
|252490|          true|698.9| 4538123|
|252490|          true|780.3|12424417|
|252490|          true|622.0|12555877|
|252490|          true|678.3| 2357157|
|252490|          true|766.0|10612043|
+------+--------------+-----+--------+
only showing top 5 rows



In [34]:
# Counting how many users have played these games
# Checing how many users have played a game & taking the top 7 games
recom_games = final_df.groupBy("app_id").count().orderBy("count", ascending=False).limit(7).collect()
recom_games

[Row(app_id=252490, count=1000),
 Row(app_id=386360, count=1),
 Row(app_id=284160, count=1),
 Row(app_id=1248130, count=1),
 Row(app_id=1938090, count=1),
 Row(app_id=218620, count=1),
 Row(app_id=1091500, count=1)]

In [35]:
# Extract count from recom_games
count_played = [row["count"] for row in recom_games[1:]]
count_played

[1, 1, 1, 1, 1, 1]

In [36]:
# Extract app_id from recom_games
apps = [row["app_id"] for row in recom_games[1:]]
apps

[386360, 284160, 1248130, 1938090, 218620, 1091500]

In [37]:
# Making recom_games list into a DataFrame with columns as "app_id" & "count_played"
apps_count = spark.createDataFrame(recom_games, ["app_id", "count_played"])
apps_count.show()

+-------+------------+
| app_id|count_played|
+-------+------------+
| 252490|        1000|
| 386360|           1|
| 284160|           1|
|1248130|           1|
|1938090|           1|
| 218620|           1|
|1091500|           1|
+-------+------------+



In [38]:
# Getting games name from games table by comparing app_id from apps list
output = games.filter(games.app_id.isin(apps))
output.show()

+-------+--------------------+----+-----+-----+--------------------+--------------+------------+-----------+
| app_id|               title| win|  mac|linux|              rating|positive_ratio|user_reviews|price_final|
+-------+--------------------+----+-----+-----+--------------------+--------------+------------+-----------+
| 218620|            PAYDAY 2|true|false| true|       Very Positive|            89|      405728|       9.99|
| 284160|        BeamNG.drive|true|false|false|Overwhelmingly Po...|            97|      165241|      24.99|
| 386360|              SMITE®|true|false|false|     Mostly Positive|            79|      109933|        0.0|
|1091500|      Cyberpunk 2077|true|false|false|     Mostly Positive|            79|      541364|      59.99|
|1248130|Farming Simulator 22|true| true|false|       Very Positive|            91|       35359|      39.99|
|1938090|Call of Duty®: Mo...|true|false|false|               Mixed|            60|      389119|      69.99|
+-------+----------

In [39]:
# Joining two Dataframe on app_id to get count_played for each game
output = output.join(apps_count, on="app_id", how="inner")

output.show()

+-------+--------------------+----+-----+-----+--------------------+--------------+------------+-----------+------------+
| app_id|               title| win|  mac|linux|              rating|positive_ratio|user_reviews|price_final|count_played|
+-------+--------------------+----+-----+-----+--------------------+--------------+------------+-----------+------------+
| 386360|              SMITE®|true|false|false|     Mostly Positive|            79|      109933|        0.0|           1|
| 284160|        BeamNG.drive|true|false|false|Overwhelmingly Po...|            97|      165241|      24.99|           1|
|1248130|Farming Simulator 22|true| true|false|       Very Positive|            91|       35359|      39.99|           1|
|1938090|Call of Duty®: Mo...|true|false|false|               Mixed|            60|      389119|      69.99|           1|
| 218620|            PAYDAY 2|true|false| true|       Very Positive|            89|      405728|       9.99|           1|
|1091500|      Cyberpunk

In [40]:
# Filters
cond = input("Do you want to add filters(Os,Price,None): ")

if cond.lower() == 'os':
    os = input("Enter preferred Os system(windows/mac/linux/any): ").lower()
    if(os=='windows'):
      os = 'win'

    if os in ['win', 'mac', 'linux', 'any']:
        if os == 'any':
            output.show(truncate=False)
        else:
            os_col = os if os != 'any' else 'win'
            filtered_titles = output.filter(output[os_col] == True)
            if filtered_titles.count() == 0:
                print("No Games found")
            else:
                filtered_titles.show(truncate=False)
    else:
        print("Please enter a valid OS option.")

elif cond.lower() == 'price':
    price = float(input("Enter price range: "))
    filtered_titles = output.filter(output.price_final <= price)
    filtered_titles.show(truncate=False)

elif cond.lower() == 'none':
    output.show(truncate=False)

else:
    print("Please enter valid filter options.")

Do you want to add filters(Os,Price,None): none
+-------+---------------------------------+----+-----+-----+-----------------------+--------------+------------+-----------+------------+
|app_id |title                            |win |mac  |linux|rating                 |positive_ratio|user_reviews|price_final|count_played|
+-------+---------------------------------+----+-----+-----+-----------------------+--------------+------------+-----------+------------+
|386360 |SMITE®                           |true|false|false|Mostly Positive        |79            |109933      |0.0        |1           |
|284160 |BeamNG.drive                     |true|false|false|Overwhelmingly Positive|97            |165241      |24.99      |1           |
|1248130|Farming Simulator 22             |true|true |false|Very Positive          |91            |35359       |39.99      |1           |
|1938090|Call of Duty®: Modern Warfare® II|true|false|false|Mixed                  |60            |389119      |69.99      |

In [None]:
#spark.stop()