In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
INPUT_PATH = 'resources/board_games.csv'
CATEGORY_SEP = ','
GENRES = {
    'cat_kids': {"children's game",},
    'cat_war': {'modern warfare', 'wargame', 'american civil war', 'civil war', 'korean war',
            'world war ii', 'american revolutionary war', 'vietnam war', 'american indian wars',
            'world war i', },
}

In [3]:
# Load the CSV file 
df = pd.read_csv(INPUT_PATH).dropna()

# Display the first few rows and the summary of the DataFrame
print(df.columns)
print(df.shape)
df.head()

Index(['game_id', 'description', 'image', 'max_players', 'max_playtime',
       'min_age', 'min_players', 'min_playtime', 'name', 'playing_time',
       'thumbnail', 'year_published', 'artist', 'category', 'compilation',
       'designer', 'expansion', 'family', 'mechanic', 'publisher',
       'average_rating', 'users_rated'],
      dtype='object')
(163, 22)


Unnamed: 0,game_id,description,image,max_players,max_playtime,min_age,min_players,min_playtime,name,playing_time,...,artist,category,compilation,designer,expansion,family,mechanic,publisher,average_rating,users_rated
12,13,"In Catan (formerly The Settlers of Catan), pla...",//cf.geekdo-images.com/images/pic2419375.jpg,4,120,10,3,60,Catan,120,...,"Volkan Baga,Tanja Donner,Pete Fenlon,Jason Haw...",Negotiation,"CATAN 3D Collector's Edition,Catan: Big Box,Ca...",Klaus Teuber,"20 Jahre Darmstadt Spielt,Brettspiel Adventska...","Catan,Promotional Board Games","Dice Rolling,Hand Management,Modular Board,Rou...","KOSMOS,999 Games,Albi,Astrel Games,Bergsala En...",7.26569,67655
44,49,Designer Uwe Rosenberg has a talent for dreami...,//cf.geekdo-images.com/images/pic2824027.jpg,5,30,10,2,30,Mamma Mia!,30,...,Franz Vohwinkel,Card Game,Mamma Mia! con Ingredientes dobles,Uwe Rosenberg,"Mamma Mia: Double Ingredients,Mamma Mia: Joker...","Food / Cooking,Mamma Mia!","Hand Management,Memory","ABACUSSPIELE,Brain Games,cutia.ro,dV Giochi,G3...",6.48827,4824
79,93,"In this award-winning game, players take on th...",//cf.geekdo-images.com/images/pic180538.jpg,5,120,12,2,60,El Grande,120,...,Doris Matthäus,Renaissance,"El Grande Big Box,El Grande Decennial Edition","Wolfgang Kramer,Richard Ulrich","The El Grande Expansions,El Grande: Grandissim...","Country: Spain,El Grande","Area Control / Area Influence,Area Movement,Au...","Hans im Glück Verlags-GmbH,999 Games,Användbar...",7.80446,18423
151,181,"Possibly the most popular, mass market war gam...",//cf.geekdo-images.com/images/pic2920766.jpg,6,120,10,2,120,Risk,120,...,(Uncredited),"Negotiation,Territory Building,Wargame","Risk & Castle Risk,Spaß am Erfolg: Karriere, D...","Albert Lamorisse,Michael I. Levin","Concurrent,La Conquête du Monde revised,Euro R...","Components: Miniatures,Hasbro Nostalgia Series...","Area Control / Area Influence,Area Movement,Di...","(Unknown),Alga,Barnes & Noble,Borras Plana S.A...",5.57906,21836
226,278,Catan Card Game bears only a slight resemblanc...,//cf.geekdo-images.com/images/pic135066.jpg,2,90,10,2,90,Catan Card Game,90,...,"Tanja Donner,Pete Fenlon,Jason Hawkins,Matt Sc...","Card Game,City Building,Territory Building",Die Siedler von Catan: Das Kartenspiel – 10th ...,Klaus Teuber,"Catan Card Game: Artisans & Benefactors,Catan ...","Catan,Die Siedler von Catan: Das Kartenspiel /...","Card Drafting,Dice Rolling,Hand Management,Tra...","999 Games,Competo / Marektoy,Devir,Filosofia É...",6.77393,11105


In [4]:
# Check for missing values
missing_values = df.isnull().sum()
print(missing_values[missing_values > 0])

Series([], dtype: int64)


In [5]:
# Remove duplicates
df = df.drop_duplicates()

# Check the number of rows before and after
print("Number of rows after removing duplicates:", len(df))

Number of rows after removing duplicates: 163


In [6]:
# Example: Standardize categorical variables (e.g., lowercase the 'category' column)
df['category'] = df['category'].str.lower()

# Display unique categories to confirm changes
print(df['category'].unique())
df['category'].iloc[0]
   

['negotiation' 'card game' 'renaissance'
 'negotiation,territory building,wargame'
 'card game,city building,territory building'
 'bluffing,card game,city building,fantasy,medieval' 'ancient,wargame'
 'city building,medieval,territory building'
 'deduction,medieval,murder/mystery,religious' 'card game,mafia'
 'wargame,world war ii'
 'card game,collectible components,fantasy,fighting,medieval'
 'card game,fantasy,fighting,humor'
 'card game,collectible components,fighting,horror,political'
 'science fiction' 'abstract strategy,animals'
 'economic,political,wargame,world war ii'
 'city building,economic,farming' 'animals,bluffing,farming,puzzle'
 'animals,card game,humor,negotiation'
 'american west,bluffing,card game,deduction,fighting'
 'fighting,science fiction,wargame' 'fantasy,miniatures,wargame'
 'napoleonic,wargame' 'arabian,city building,medieval'
 'science fiction,wargame' 'vietnam war,wargame'
 'card game,fighting,humor'
 'action / dexterity,card game,party game,real-time' 'tra

'negotiation'

In [7]:
# get all orig  cat for to make GENRES dictionary
# orignal_categories = list(set(df.category.str.cat(sep=CATEGORY_SEP).split(CATEGORY_SEP)))
# orignal_categories

In [9]:
def get_genres(row):
    raw_categories = set(row['category'].split(CATEGORY_SEP))
    # Now use GENRES and raw_genres to produce a dictionary genres
    genres = {}
    for good_genre, original_genre_strings in GENRES.items():       
        genres[good_genre] = not original_genre_strings.isdisjoint(raw_categories)
       
    return genres.values()

In [11]:
df[list(GENRES.keys())] = df.apply(get_genres, axis='columns', result_type='expand')
print(df.columns)
print(df.shape)
df.head()

Index(['game_id', 'description', 'image', 'max_players', 'max_playtime',
       'min_age', 'min_players', 'min_playtime', 'name', 'playing_time',
       'thumbnail', 'year_published', 'artist', 'category', 'compilation',
       'designer', 'expansion', 'family', 'mechanic', 'publisher',
       'average_rating', 'users_rated', 'cat_kids', 'cat_war'],
      dtype='object')
(163, 24)


Unnamed: 0,game_id,description,image,max_players,max_playtime,min_age,min_players,min_playtime,name,playing_time,...,compilation,designer,expansion,family,mechanic,publisher,average_rating,users_rated,cat_kids,cat_war
12,13,"In Catan (formerly The Settlers of Catan), pla...",//cf.geekdo-images.com/images/pic2419375.jpg,4,120,10,3,60,Catan,120,...,"CATAN 3D Collector's Edition,Catan: Big Box,Ca...",Klaus Teuber,"20 Jahre Darmstadt Spielt,Brettspiel Adventska...","Catan,Promotional Board Games","Dice Rolling,Hand Management,Modular Board,Rou...","KOSMOS,999 Games,Albi,Astrel Games,Bergsala En...",7.26569,67655,False,False
44,49,Designer Uwe Rosenberg has a talent for dreami...,//cf.geekdo-images.com/images/pic2824027.jpg,5,30,10,2,30,Mamma Mia!,30,...,Mamma Mia! con Ingredientes dobles,Uwe Rosenberg,"Mamma Mia: Double Ingredients,Mamma Mia: Joker...","Food / Cooking,Mamma Mia!","Hand Management,Memory","ABACUSSPIELE,Brain Games,cutia.ro,dV Giochi,G3...",6.48827,4824,False,False
79,93,"In this award-winning game, players take on th...",//cf.geekdo-images.com/images/pic180538.jpg,5,120,12,2,60,El Grande,120,...,"El Grande Big Box,El Grande Decennial Edition","Wolfgang Kramer,Richard Ulrich","The El Grande Expansions,El Grande: Grandissim...","Country: Spain,El Grande","Area Control / Area Influence,Area Movement,Au...","Hans im Glück Verlags-GmbH,999 Games,Användbar...",7.80446,18423,False,False
151,181,"Possibly the most popular, mass market war gam...",//cf.geekdo-images.com/images/pic2920766.jpg,6,120,10,2,120,Risk,120,...,"Risk & Castle Risk,Spaß am Erfolg: Karriere, D...","Albert Lamorisse,Michael I. Levin","Concurrent,La Conquête du Monde revised,Euro R...","Components: Miniatures,Hasbro Nostalgia Series...","Area Control / Area Influence,Area Movement,Di...","(Unknown),Alga,Barnes & Noble,Borras Plana S.A...",5.57906,21836,False,True
226,278,Catan Card Game bears only a slight resemblanc...,//cf.geekdo-images.com/images/pic135066.jpg,2,90,10,2,90,Catan Card Game,90,...,Die Siedler von Catan: Das Kartenspiel – 10th ...,Klaus Teuber,"Catan Card Game: Artisans & Benefactors,Catan ...","Catan,Die Siedler von Catan: Das Kartenspiel /...","Card Drafting,Dice Rolling,Hand Management,Tra...","999 Games,Competo / Marektoy,Devir,Filosofia É...",6.77393,11105,False,False
