# Video Game Dataset Preparation

# Setting Up Necessary Things

In [1]:
# Jupyter Notebook Magic Command - Auto Reloading
%reload_ext autoreload
%autoreload 2

# Jupyter Notebook Magic Command - Inline Plotting
%matplotlib inline

In [2]:
# Ignore All Warnings
import warnings
warnings.filterwarnings("ignore")

# Necessary Imports

In [3]:
# General
import os
from IPython.display import display

# Data
import pandas as pd

# Data Cleaning

In [4]:
df_demo = pd.read_csv("../../data/raw/video_game/games.csv")
df_demo.head()

Unnamed: 0,AppID,Name,Release date,Estimated owners,Peak CCU,Required age,Price,DLC count,About the game,Supported languages,...,Average playtime two weeks,Median playtime forever,Median playtime two weeks,Developers,Publishers,Categories,Genres,Tags,Screenshots,Movies
0,20200,Galactic Bowling,"Oct 21, 2008",0 - 20000,0,0,19.99,0,Galactic Bowling is an exaggerated and stylize...,['English'],...,0,0,0,Perpetual FX Creative,Perpetual FX Creative,"Single-player,Multi-player,Steam Achievements,...","Casual,Indie,Sports","Indie,Casual,Sports,Bowling",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
1,655370,Train Bandit,"Oct 12, 2017",0 - 20000,0,0,0.99,0,THE LAW!! Looks to be a showdown atop a train....,"['English', 'French', 'Italian', 'German', 'Sp...",...,0,0,0,Rusty Moyher,Wild Rooster,"Single-player,Steam Achievements,Full controll...","Action,Indie","Indie,Action,Pixel Graphics,2D,Retro,Arcade,Sc...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
2,1732930,Jolt Project,"Nov 17, 2021",0 - 20000,0,0,4.99,0,Jolt Project: The army now has a new robotics ...,"['English', 'Portuguese - Brazil']",...,0,0,0,Campião Games,Campião Games,Single-player,"Action,Adventure,Indie,Strategy",,https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
3,1355720,Henosis™,"Jul 23, 2020",0 - 20000,0,0,5.99,0,HENOSIS™ is a mysterious 2D Platform Puzzler w...,"['English', 'French', 'Italian', 'German', 'Sp...",...,0,0,0,Odd Critter Games,Odd Critter Games,"Single-player,Full controller support","Adventure,Casual,Indie","2D Platformer,Atmospheric,Surreal,Mystery,Puzz...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
4,1139950,Two Weeks in Painland,"Feb 3, 2020",0 - 20000,0,0,0.0,0,ABOUT THE GAME Play as a hacker who has arrang...,"['English', 'Spanish - Spain']",...,0,0,0,Unusual Games,Unusual Games,"Single-player,Steam Achievements","Adventure,Indie","Indie,Adventure,Nudity,Violent,Sexual Content,...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...


In [5]:
# Get and Load Raw Data Into a List
def get_dataframe(path):
    df = pd.read_csv(path)
    df = df.loc[:, ["Name", "About the game", "Genres"]]
    df.rename(columns={"Name": "title", 
                       "About the game": "description",
                       "Genres": "genres"}, 
              inplace=True, 
              errors='raise')
    df = df.dropna(axis=0, how="any")
    df = df.drop_duplicates(subset = "title", keep='first')
    df = df.drop_duplicates(subset = "description", keep='first')
    return df

In [6]:
# Get DataFrame Information
def get_dataframe_info(df):
    print("Shape of the dataframe: ", df.shape)
    print("Null Values:")
    display(df.isna().sum())
    print("Duplicate Movie Name Count: ", df["title"].duplicated().sum())
    print("Duplicate Description Count: ", df["description"].duplicated().sum())
    print("DataFrame Details:")
    display(df.describe(include="object"))

In [7]:
video_game_df = get_dataframe("../../data/raw/video_game/games.csv")

In [8]:
get_dataframe_info(video_game_df)

Shape of the dataframe:  (73123, 3)
Null Values:


title          0
description    0
genres         0
dtype: int64

Duplicate Movie Name Count:  0
Duplicate Description Count:  0
DataFrame Details:


Unnamed: 0,title,description,genres
count,73123,73123,73123
unique,73123,73123,2260
top,Galactic Bowling,Galactic Bowling is an exaggerated and stylize...,"Casual,Indie"
freq,1,1,4240


## Primary Genre and Type

In [11]:
# Create Primary Genre Column From Genres
primary_genre = []

for i in range(video_game_df.shape[0]):
    primary_genre.append(video_game_df["genres"].iloc[i].split(",")[0])

len(primary_genre), primary_genre[:5]

(73123, ['Casual', 'Action', 'Action', 'Adventure', 'Adventure'])

In [12]:
# insert Primary Genre column into the dataframe
video_game_df["primary_genre"] = primary_genre

In [13]:
# Entertainment Type Video Game Enter
video_game_df["type"] = "video game"

In [14]:
video_game_df.head()

Unnamed: 0,title,description,genres,primary_genre,type
0,Galactic Bowling,Galactic Bowling is an exaggerated and stylize...,"Casual,Indie,Sports",Casual,video game
1,Train Bandit,THE LAW!! Looks to be a showdown atop a train....,"Action,Indie",Action,video game
2,Jolt Project,Jolt Project: The army now has a new robotics ...,"Action,Adventure,Indie,Strategy",Action,video game
3,Henosis™,HENOSIS™ is a mysterious 2D Platform Puzzler w...,"Adventure,Casual,Indie",Adventure,video game
4,Two Weeks in Painland,ABOUT THE GAME Play as a hacker who has arrang...,"Adventure,Indie",Adventure,video game


# Dataframe to CSV

In [15]:
# Method to convert dataframe to CSV and save
def write_dataframe_to_csv(path, dataframe):
    if os.path.exists(path):
        print(f"The file already exists ...! [Find the file in the location '{path}']")
    else:
        dataframe.to_csv(path, index = False)
        print("Dataframe saved successfully: ", path)

In [16]:
video_game_path = "../../data/processed/video_game/video_game.csv"

write_dataframe_to_csv(video_game_path, video_game_df)

Dataframe saved successfully:  ../../data/processed/video_game/video_game.csv
