# Bundle EDA

## Goal: Recommend games to play, given that a user bought some bundle

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path

In [2]:
fp = Path("data") / "bundle.csv"

df = pd.read_csv(fp)
df.head(3)

Unnamed: 0,bundle_final_price,bundle_url,bundle_price,bundle_name,bundle_id,items,bundle_discount
0,$66.46,http://store.steampowered.com/bundle/450/?utm_...,$73.86,Dharker Studio 2015 Complete,450,"[{'genre': 'Adventure, Indie, RPG', 'item_id':...",10%
1,$20.15,http://store.steampowered.com/bundle/1473/?utm...,$25.87,"Naruto Shippuden Uncut Season 4, Vol. 3",1473,"[{'genre': '', 'item_id': '528360', 'discounte...",22%
2,$20.15,http://store.steampowered.com/bundle/1474/?utm...,$25.87,"Naruto Shippuden Uncut Season 4, Vol. 4",1474,"[{'genre': '', 'item_id': '528380', 'discounte...",22%


In [3]:
import ast

df["items"] = df["items"].apply(lambda x: ast.literal_eval(x))
df.head(3)

Unnamed: 0,bundle_final_price,bundle_url,bundle_price,bundle_name,bundle_id,items,bundle_discount
0,$66.46,http://store.steampowered.com/bundle/450/?utm_...,$73.86,Dharker Studio 2015 Complete,450,"[{'genre': 'Adventure, Indie, RPG', 'item_id':...",10%
1,$20.15,http://store.steampowered.com/bundle/1473/?utm...,$25.87,"Naruto Shippuden Uncut Season 4, Vol. 3",1473,"[{'genre': '', 'item_id': '528360', 'discounte...",22%
2,$20.15,http://store.steampowered.com/bundle/1474/?utm...,$25.87,"Naruto Shippuden Uncut Season 4, Vol. 4",1474,"[{'genre': '', 'item_id': '528380', 'discounte...",22%


In [4]:
df = (
df
    .drop(columns=["items"])
    .merge(df["items"]
        .explode()
        .rename("item")
        .to_frame(), 
        left_index=True, right_index=True)
    .reset_index(drop=True)
)
df.head(3)

Unnamed: 0,bundle_final_price,bundle_url,bundle_price,bundle_name,bundle_id,bundle_discount,item
0,$66.46,http://store.steampowered.com/bundle/450/?utm_...,$73.86,Dharker Studio 2015 Complete,450,10%,"{'genre': 'Adventure, Indie, RPG', 'item_id': ..."
1,$66.46,http://store.steampowered.com/bundle/450/?utm_...,$73.86,Dharker Studio 2015 Complete,450,10%,"{'genre': 'Adventure, Indie, RPG', 'item_id': ..."
2,$66.46,http://store.steampowered.com/bundle/450/?utm_...,$73.86,Dharker Studio 2015 Complete,450,10%,"{'genre': 'Adventure, Indie, RPG', 'item_id': ..."


In [5]:
item_keys = list(df["item"].iloc[0].keys())
item_keys

['genre', 'item_id', 'discounted_price', 'item_url', 'item_name']

In [6]:
for key in item_keys:
    df[key] = df["item"].apply(lambda item: item[key])

df.head(3)

Unnamed: 0,bundle_final_price,bundle_url,bundle_price,bundle_name,bundle_id,bundle_discount,item,genre,item_id,discounted_price,item_url,item_name
0,$66.46,http://store.steampowered.com/bundle/450/?utm_...,$73.86,Dharker Studio 2015 Complete,450,10%,"{'genre': 'Adventure, Indie, RPG', 'item_id': ...","Adventure, Indie, RPG",326950,$8.99,http://store.steampowered.com/app/326950,Sword of Asumi
1,$66.46,http://store.steampowered.com/bundle/450/?utm_...,$73.86,Dharker Studio 2015 Complete,450,10%,"{'genre': 'Adventure, Indie, RPG', 'item_id': ...","Adventure, Indie, RPG",331490,$2.99,http://store.steampowered.com/app/331490,Sword of Asumi - Soundtrack
2,$66.46,http://store.steampowered.com/bundle/450/?utm_...,$73.86,Dharker Studio 2015 Complete,450,10%,"{'genre': 'Adventure, Indie, RPG', 'item_id': ...","Adventure, Indie, RPG",331491,$1.99,http://store.steampowered.com/app/331491,Sword of Asumi - Graphic Novel


In [7]:
feature_candidates = [
    "bundle_id", 
    "item_id",
    "item_name", 
    "genre",
    # Avoiding prices as those may overfit to bundles with only 3.5k items available
    # "bundle_price", 
    # "bundle_final_price", 
    # "bundle_discount", 
    # "discounted_price",
]

bundle = df[feature_candidates]
bundle.head(3)

Unnamed: 0,bundle_id,item_id,item_name,genre
0,450,326950,Sword of Asumi,"Adventure, Indie, RPG"
1,450,331490,Sword of Asumi - Soundtrack,"Adventure, Indie, RPG"
2,450,331491,Sword of Asumi - Graphic Novel,"Adventure, Indie, RPG"


In [None]:
save_fp = Path("data") / "bundle_task.csv"
bundle.to_csv(save_fp, index=False)

In [5]:
import pandas as pd
pd.read_csv("data/bundle_task.csv")

Unnamed: 0,bundle_id,item_id,item_name,genre
0,450,326950,Sword of Asumi,"Adventure, Indie, RPG"
1,450,331490,Sword of Asumi - Soundtrack,"Adventure, Indie, RPG"
2,450,331491,Sword of Asumi - Graphic Novel,"Adventure, Indie, RPG"
3,450,331492,Sword of Asumi - Character Creator,"Adventure, Indie, RPG"
4,450,348540,Divine Slice of Life,"Adventure, Casual, Indie"
...,...,...,...,...
3520,588,467220,Dyna Bomb,"Action, Adventure, Casual, Indie"
3521,588,485090,Dyna Bomb - Soundtrack OST,"Action, Adventure, Casual, Indie"
3522,594,385230,Ninjahtic,"Action, Adventure, Indie"
3523,594,387880,Ninjahtic Mind Tricks,"Action, Adventure, Indie"
