In [1]:
import pandas as pd
from google.colab import drive

import numpy as np
import random

randomSeed = 1
random.seed(randomSeed)
np.random.seed(randomSeed)

# drive.mount("/drive")
drive.mount('/content/gdrive')

df = pd.read_csv("gdrive/My Drive/board_games.csv")



# Initial cleaning steps within data

df = df.drop(['game_id', 'description', 'artist', 'publisher', 'compilation', 'designer', 'family', 'image', 'max_playtime', 'min_playtime', 'name', 'thumbnail', 'year_published'], axis=1)
df.dtypes

#Change Expaion To Boolean
df["expansion"] = df["expansion"].notnull().astype(int)

#Split Mechanic Collum On Each Type
catSeries = df['mechanic']
catDF = catSeries.str.get_dummies(sep = ",")

#Combine
df_combined = pd.merge(df, catDF, left_index=True, right_index=True, how='inner')

#Split Categoty Collum On Each Type
catSeries = df['category']
catDF = catSeries.str.get_dummies(sep = ",")

#Combine
df_combined = pd.merge(df_combined, catDF, left_index=True, right_index=True, how='inner')

# Drop Arrays
# df = df_combined.drop(['category', 'mechanic'], axis=1)

df

Mounted at /content/gdrive


Unnamed: 0,max_players,min_age,min_players,playing_time,category,expansion,mechanic,average_rating,users_rated
0,5,14,3,240,"Economic,Negotiation,Political",0,"Area Control / Area Influence,Auction/Bidding,...",7.66508,4498
1,4,12,3,30,"Card Game,Fantasy",0,Trick-taking,6.60815,478
2,4,10,2,60,"Abstract Strategy,Medieval",0,"Area Control / Area Influence,Hand Management,...",7.44119,12019
3,4,12,2,60,Ancient,0,"Action Point Allowance System,Area Control / A...",6.60675,314
4,6,12,3,90,Economic,0,"Hand Management,Stock Holding,Tile Placement",7.35830,15195
...,...,...,...,...,...,...,...,...,...
10527,2,12,1,480,"Vietnam War,Wargame",0,Hex-and-Counter,8.35333,75
10528,5,13,2,45,"Card Game,Fantasy,Fighting,Video Game Theme",1,"Deck / Pool Building,Hand Management,Variable ...",8.08780,82
10529,4,12,2,20,Card Game,0,"Hand Management,Set Collection",7.28016,63
10530,6,12,2,120,"Miniatures,Racing,Science Fiction",0,"Action / Movement Programming,Grid Movement,Mo...",7.45871,341


In [2]:
# Removing data points with lower then 250 reviews
minReviews = 250
df = df[df["users_rated"] > minReviews]
# df = df.drop(columns=["users_rated"])
df = df.drop(columns=["users_rated"])
# print(df.shape)
# df

# Count the occurrences of each mechanic
df_exploded = df['mechanic'].str.split(',').explode()
mechanic_counts = df_exploded.value_counts()
# Get the top 5 mechanics
top_5_mechanics = mechanic_counts.head(5)

# Count the occurrences of each category
df_exploded = df['category'].str.split(',').explode()
category_count = df_exploded.value_counts()
# Get the top 5 categories
top_5_categories = category_count.head(5)

# # Drop Arrays
df = df_combined.drop(['category', 'mechanic'], axis=1)
# df.dtypes
print(df.shape)
df

(10532, 141)


Unnamed: 0,max_players,min_age,min_players,playing_time,expansion,average_rating,users_rated,Acting,Action / Movement Programming,Action Point Allowance System,...,Transportation,Travel,Trivia,Video Game Theme,Vietnam War,Wargame,Word Game,World War I,World War II,Zombies
0,5,14,3,240,0,7.66508,4498,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,4,12,3,30,0,6.60815,478,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,4,10,2,60,0,7.44119,12019,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,12,2,60,0,6.60675,314,0,0,1,...,0,0,0,0,0,0,0,0,0,0
4,6,12,3,90,0,7.35830,15195,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10527,2,12,1,480,0,8.35333,75,0,0,0,...,0,0,0,0,1,1,0,0,0,0
10528,5,13,2,45,1,8.08780,82,0,0,0,...,0,0,0,1,0,0,0,0,0,0
10529,4,12,2,20,0,7.28016,63,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10530,6,12,2,120,0,7.45871,341,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
# descriptive statistics before cleaning

!pip install prettytable
from prettytable import PrettyTable
import numpy as np

# Columns of interest
columns_of_interest = ['max_players', 'min_players', 'playing_time', 'min_age']

# initilize table
stats_table = PrettyTable()
stats_table.field_names = ['Descriptive Statistics', 'Max Players', 'Min Players', 'Average Play Time', 'Min Age'] # Declare column names

# Calculate the mean for the specified columns
means = round(df[columns_of_interest].mean(), 3)
# add to table
stats_table.add_row(["Mean", means[0], means[1], means[2], means[3]])

# Calculate the median for the specified columns
median = round(df[columns_of_interest].median(), 3)
# add to table
stats_table.add_row(["Median", median[0], median[1], median[2], median[3]])

# Calculate the standard deviation for the specified columns
std_devs = round(df[columns_of_interest].std(),3)
# add to table
stats_table.add_row(["Standard Deviation", std_devs[0], std_devs[1], std_devs[2], std_devs[3]])

# Calculate the quartiles for the specified columns
quartiles = round(df[columns_of_interest].quantile([0.25, 0.5, 0.75]),3)
# add to table
for quartile in [0.25, 0.5, 0.75]:
    quartile_values = quartiles.loc[quartile].tolist()
    name = str(int(quartile * 100)) + "th quartile"
    stats_table.add_row([name, quartile_values[0],  quartile_values[1],  quartile_values[2],  quartile_values[3]])

print(stats_table)
print("\nTop 5 Mechanics:")
print(top_5_mechanics)
print("\nTop 5 Categories:")
print(top_5_categories)

+------------------------+-------------+-------------+-------------------+---------+
| Descriptive Statistics | Max Players | Min Players | Average Play Time | Min Age |
+------------------------+-------------+-------------+-------------------+---------+
|          Mean          |    5.657    |    2.071    |       91.341      |  9.715  |
|         Median         |     4.0     |     2.0     |        45.0       |   10.0  |
|   Standard Deviation   |    18.884   |    0.664    |      659.754      |  3.451  |
|     25th quartile      |     4.0     |     2.0     |        30.0       |   8.0   |
|     50th quartile      |     4.0     |     2.0     |        45.0       |   10.0  |
|     75th quartile      |     6.0     |     2.0     |        90.0       |   12.0  |
+------------------------+-------------+-------------+-------------------+---------+

Top 5 Mechanics:
Hand Management                  1187
Dice Rolling                     1079
Set Collection                    705
Variable Player Po

In [4]:
#import matplotlib.pyplot as plt
#import missingno as msno
#msno.matrix(df)
#plt.show()
#df.isna().sum()

In [5]:
# X = df2.iloc[:,:-1] #X
y = df.pop("average_rating")
X = df
X.head()
# y.head()

Unnamed: 0,max_players,min_age,min_players,playing_time,expansion,users_rated,Acting,Action / Movement Programming,Action Point Allowance System,Area Control / Area Influence,...,Transportation,Travel,Trivia,Video Game Theme,Vietnam War,Wargame,Word Game,World War I,World War II,Zombies
0,5,14,3,240,0,4498,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
1,4,12,3,30,0,478,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,4,10,2,60,0,12019,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
3,4,12,2,60,0,314,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
4,6,12,3,90,0,15195,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
from sklearn import model_selection

X_train,X_test,y_train,y_test = model_selection.train_test_split(X,y,test_size=0.20, random_state = 1, shuffle = True)
# X_train,X_test,y_train,y_test = model_selection.train_test_split(X,y,test_size=0.10, random_state = 1, shuffle = True)

X_train.head()

Unnamed: 0,max_players,min_age,min_players,playing_time,expansion,users_rated,Acting,Action / Movement Programming,Action Point Allowance System,Area Control / Area Influence,...,Transportation,Travel,Trivia,Video Game Theme,Vietnam War,Wargame,Word Game,World War I,World War II,Zombies
2294,2,12,2,120,1,113,0,0,0,0,...,0,0,0,0,0,1,0,0,1,0
5531,4,0,2,45,0,100,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1933,10,13,4,30,0,62,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
7018,0,12,2,240,1,211,0,0,0,0,...,0,0,0,0,0,1,0,1,0,0
4543,5,10,2,45,0,132,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
from sklearn import svm
from prettytable import PrettyTable
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import GridSearchCV
import numpy as np

# used for finding best parameters

# Parameter tuning with GridSearchCV
#parameters = {'C': [1, 10, 100], 'gamma': [0.1, 0.01, 0.001, 0.0001], 'kernel': ['rbf']}
#svc = svm.SVR()
#clf = GridSearchCV(svc, parameters, cv=5)
#clf.fit(X_train, y_train)
#print(clf.best_params_)
# Use the best estimator found
#regr = clf.best_estimator_
#y_pred = regr.predict(X_test)
#y_test = y_test.values

regr = svm.SVR(kernel = "rbf", C=100,gamma=0.0001)
regr.fit(X_train, y_train)

y_pred = regr.predict(X_test)

y_test = y_test.values
error = abs(y_test - y_pred)
# print(y_test.values , y_pred)

print(f"Median: {np.median(error)}")
df_describe = pd.DataFrame(error)
df_describe.describe()

# Errors
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print("Mean Absolute Error:", mae)
print("Mean Squared Error:", mse)


## Print them to visually see how they match
example_table = PrettyTable() # Initialize Table
example_table.field_names = ["Y Pred", "Y True", "ERROR"] # Declare column names
for i in range(len(y_pred)):
    example_table.add_row([f"{y_pred[i]}",f"{y_test[i]}", error[i]])
    if i == 10:
      break

## See them Side By Side
print(example_table)



Median: 0.4297872006179473
Mean Absolute Error: 0.537267149057672
Mean Squared Error: 0.4977876285914228
+--------------------+---------+----------------------+
|       Y Pred       |  Y True |        ERROR         |
+--------------------+---------+----------------------+
| 6.990367497761544  | 6.46364 |  0.5267274977615442  |
| 6.058436105128782  | 6.06983 | 0.011393894871218002 |
| 5.984612544552708  |  6.6028 |  0.6181874554472921  |
| 7.006820063832483  | 7.84689 |  0.8400699361675175  |
| 6.598851909847276  | 6.17857 | 0.42028190984727676  |
| 7.132379276452942  | 7.33194 | 0.19956072354705867  |
| 5.8501812208506125 | 5.48525 |  0.3649312208506128  |
| 6.698355962375257  | 6.90227 | 0.20391403762474258  |
| 5.749136944830572  |   4.34  |  1.4091369448305722  |
| 5.9609629367381105 | 5.11987 |  0.8410929367381108  |
| 5.933941170394374  | 5.18627 |  0.7476711703943737  |
+--------------------+---------+----------------------+


In [8]:
# import random

maxes = df.describe().loc["max"]
# maxes = df["playing_time"].median()

#exDist = np.random.default_rng().binomial(n= ((5.44)+4)/2 * val, p= 1/val, size=n)

#MED: 4.0,10.0,2.0,45.0

#'''

# np.random.seed(0)

numGames = 100

bestGame = None
bestRating = 0

totalMechs = 51
totalCats = 83

val = 10
mechDist = np.random.default_rng(seed = randomSeed).binomial(n= 2.274 * val, p= 1/val, size=numGames)
val = 5
cataDist = np.random.default_rng(seed = randomSeed).binomial(n= 2.603 * val, p= 1/val, size=numGames)


val = 10
numberCols = ["max_players", "min_age", "min_players", "playing_time"]
dists = {}
for numberCol in numberCols:
    dists[numberCol] = np.random.default_rng(seed = randomSeed).binomial(n=((df[numberCol].mean())+df[numberCol].median())/2 * val, p= 1/val, size=numGames)


limitAllVars = True
limitNumCats = True or limitAllVars

def generateRandomGame():
    gameDict = {}

    for collumn in df.columns:
        randDistIndex = int(random.random() * numGames)

        if(collumn in numberCols and limitAllVars):
            gameDict[collumn] = [int(dists[collumn][randDistIndex])]
            # print(collumn)
        else:
        # print(collumn)
            gameDict[collumn] = [int(random.random() * maxes[collumn])] #0's Basically
        # print(int(random.random() * maxes[collumn]))

    # print(mechDist[i], cataDist[i])
    if(limitNumCats):
        numMech = mechDist[randDistIndex]
        numCats = cataDist[randDistIndex]
        # print(f"COUNTS: {numMech}, {numCats}")
        mechs = []
        cats = []

        for i in range(numMech):
            mech = int(random.random() * totalMechs)
            while(mech in mechs):
                mech = int(random.random() * totalMechs)
            mechs.append(mech)

        for i in range(numCats):
            cat = int(random.random() * totalCats)
            while(cat in cats):
                cat = int(random.random() * totalCats)
            cats.append(cat)
    else:
        for collumn in df.columns:
            if(collumn in numberCols):
                pass
                # print(collumn)
            else:
                gameDict[collumn] = [int(random.random() * 2)]

    # print(mechs, cats)

    collumns = df.columns


    for i, mech in enumerate(mechs):
        # print(f"MECH: {collumns[mech + 5]}")
        gameDict[collumns[mech + 5]] = 1

    for i, cat in enumerate(cats):
        # print(f"CAT: {collumns[cat + 5 + totalMechs]}")
        gameDict[collumns[cat + 5 + totalMechs]] = 1

    gameDF = pd.DataFrame.from_dict(gameDict)
    # print(gameDF)
    return gameDF



for c in range(numGames):

    gameDF = generateRandomGame()
    # printGame(gameDF)

    gameRating = regr.predict(gameDF)
    print(gameRating)
    if(gameRating > bestRating):
        bestRating = gameRating
        bestGame = gameDF

#'''

# maxes


#Gamma Allowed:
#Unvaried Input = Varied Score (lower) (6.88)
#Varied Games = Constant Score (Low) (6.803 ==)


#Gamma Taken Out
#Unvaried Input = Low Score (6.86)
#Varied Games = Higher Score (7.92)


#1.38, 5.8, 6,4, 6.94, 9.00


[7.57872399]
[7.57872399]
[7.57871087]
[7.57872399]
[7.57872399]
[7.57872399]
[7.45392677]
[7.61411207]
[7.57872399]
[6.9669582]
[7.57872399]
[7.49138983]
[7.57872399]
[5.32983085]
[7.57914684]
[7.57872392]
[7.57872399]
[7.57864475]
[7.57872399]
[7.96998624]
[7.57872399]
[5.55023977]
[7.57872399]
[7.57872399]
[5.47686716]
[7.57872399]
[7.57872112]
[7.11728227]
[7.57872399]
[6.76464974]
[7.46984807]
[6.25442056]
[7.57872399]
[7.57871144]
[7.57872399]
[7.57870286]
[7.61355863]
[7.46721984]
[7.57872399]
[5.52578221]
[7.15024351]
[7.49487804]
[7.69128567]
[7.57872399]
[7.76316497]
[7.57872399]
[7.82477204]
[7.80777759]
[7.57872399]
[7.57872399]
[7.57875064]
[6.63805648]
[7.57872399]
[7.57872399]
[7.57872401]
[7.57872399]
[7.56786861]
[7.53814542]
[7.58915267]
[7.57872399]
[7.57872399]
[7.57872399]
[7.57872399]
[7.57872399]
[7.60170117]
[7.13208025]
[7.22018239]
[7.578724]
[7.57872399]
[7.26100868]
[7.42163767]
[7.74741386]
[6.70327562]
[5.56308735]
[6.90338199]
[7.38022613]
[7.08777602]
[6

In [9]:
def printGame(bestGame):
    gameInfo = bestGame.iloc[:, : 5]
    print(gameInfo)

    mechanics = bestGame.iloc[:, 5 : 5+51]
    mechanics

    categories =  bestGame.iloc[:, 5+51+0: ]
    categories

    listOfMechaincs = np.where(mechanics.values[0] == 1)
    listOfMechaincs = mechanics.columns.values[listOfMechaincs[0]]
    print(listOfMechaincs)


    # print(categories.columns.values, categories.values[0])
    listOfCategories = np.where(categories.values[0] == 1)
    listOfCategories = categories.columns.values[listOfCategories[0]]
    print(listOfCategories)

    rating = regr.predict(bestGame)
    print(f"Rating: {rating}")

printGame(bestGame)
printGame(pd.DataFrame(X_test.iloc[4]).transpose())
print(type(bestGame), type(X_test))


   max_players  min_age  min_players  playing_time  expansion
0            5        9            0            65          0
['Grid Movement' 'Pick-up and Deliver']
['Renaissance' 'Trivia']
Rating: [8.02514894]
      max_players  min_age  min_players  playing_time  expansion
2874            2       12            2           240          0
['Dice Rolling' 'Hex-and-Counter']
['American Civil War' 'Wargame']
Rating: [6.59885191]
<class 'pandas.core.frame.DataFrame'> <class 'pandas.core.frame.DataFrame'>


In [10]:
def isReasonableGame(game: pd.DataFrame) -> bool:
    for i, column in enumerate(game.columns):
        # print(i,column)
        row = game.iloc[0]
        # print(row)
        # print(f"GAME: {game[column]} ENDGAME")
        # print(f"ROW: {row[column]} ENDROW")
        if(i == 0):
            #max_players
            if(row[column] > 11 or row[column] < 2):
                return False
        elif(i == 1):
            #min_age
            if(row[column] > 18 or row[column] < 5):
                return False
        elif(i == 2):
            #min_players
            if(row[column] > 5 or row[column] < 1):
                return False
        elif(i == 3):
            #playing_time
            if(row[column] > 300 or row[column] < 10):
                return False
        else:
            #Mechanics and Categories
            if(row[column] != 0 and row[column] != 1):
                return False

    numMechs = game.iloc[0,5:56].sum()
    numCats  = game.iloc[0,56:].sum()

    valid = numMechs > 0 and numMechs < 6 and numCats > 0 and numCats < 6 and game["max_players"][0] > game["min_players"][0]
    return valid



def gameNeighbors(game: pd.DataFrame) -> list[pd.DataFrame]:
    neighbors = []

    for i, column in enumerate(game.columns):
        # print(i,column)
        # row = game.iloc[0]
        # print(row)
        # print(f"GAME: {game[column]} ENDGAME")
        # print(f"ROW: {row[column]} ENDROW")

        addNei = game.copy(deep=True)
        row = addNei.iloc[0]
        row[column] += 1
        # print(row, game.iloc[0])
        if(isReasonableGame(addNei)):
            neighbors.append(addNei)

        subNei = game.copy(deep=True)
        row = subNei.iloc[0]
        row[column] -= 1
        if(isReasonableGame(subNei)):
            neighbors.append(subNei)

    return neighbors

def generatorNeighbors(game: pd.DataFrame):
    for i, column in enumerate(game.columns):
        # print(i,column)
        # row = game.iloc[0]
        # print(row)
        # print(f"GAME: {game[column]} ENDGAME")
        # print(f"ROW: {row[column]} ENDROW")

        addNei = game.copy(deep=True)
        row = addNei.iloc[0]
        row[column] += 1
        # print(row, game.iloc[0])
        if(isReasonableGame(addNei)):
            # neighbors.append(addNei)
            yield addNei

        subNei = game.copy(deep=True)
        row = subNei.iloc[0]
        row[column] -= 1
        if(isReasonableGame(subNei)):
            # neighbors.append(subNei)
            yield subNei


def scoreState(state) -> float:
    return regr.predict(state)

def hillClimbingStep(currentGame: pd.DataFrame):
    currentScore = scoreState(currentGame)
    #print(f"CS: {currentScore}")

    currentBestNeighbor = None

    # neighbors = gameNeighbors(currentGame)
    # for i in range(len(neighbors)):
    #     neighbor = neighbors[i]

    for neighbor in generatorNeighbors(currentGame):

        neighborScore = scoreState(neighbor)
        # print(f"NP: {neighbor}, NS: {neighborScore}")
        # print(f"NS: {neighborScore}, CS: {currentScore}")
        if(neighborScore >= currentScore):
            return neighbor
            #CheckAll
            currentScore = neighborScore
            currentBestNeighbor = neighbor
    # if(currentBestInd == -1):
    if(type(currentBestNeighbor) == type(None)):
        #print("NO MORE STEPS TO TAKE")
        return currentGame #currentScore
    else:
        return currentBestNeighbor

# print(bestGame)
# gameNeighbors(bestGame)
# isReasonableGame(pd.DataFrame(X_test.iloc[4]).transpose())

# gen = generatorNeighbors(bestGame)
# gen = gameNeighbors(bestGame)
# print(next(gen))
# print(next(gen))
# print(gen)

# bestGame["max_players"][0]
# bestGame.iloc[0,5:56].sum()
# bestGame.iloc[0,56:].sum()



In [11]:
restarts = 7
maxScore = -1

finalBestGame = None
finalBestScore = -1

for j in range(restarts):

    currentGame = generateRandomGame()
    # printGame(currentGame)

    steps = 1000

    lastScore = None
    nextGameScore = None
    for i in range(0,steps):
        #print(f"CurrentPath: {currentPath}, Score = {g.pathCost(currentPath)}")
        nextGame = hillClimbingStep(currentGame)

        lastScore = nextGameScore
        nextGameScore = scoreState(nextGame)
        # print(f"Score: {nextGameScore}")
        if(lastScore == nextGameScore):
            break
        # if(i == 1):
            # printGame(nextGame)
        '''
        if(nextGame == currentGame):
            break #Early
        #'''
        currentGame = nextGame

    localMaximaScore = scoreState(currentGame)

    if(localMaximaScore > maxScore):
        maxScore = localMaximaScore

    sc = scoreState(currentGame)
    # print(sc)
    # printGame(currentGame)
    if(sc > finalBestScore):
        finalBestScore = sc
        finalBestGame = currentGame

    print(f"Game Score At Local Minima: {localMaximaScore}, MaxScore {finalBestScore}")


print(f"Final Game")
printGame(finalBestGame)
# print(f"Best Score: {finalBestScore}")

#Generator
#For Valid Game
#Max Players > min Players + 1
#Mechanics and Attributes Counts Capped

#Seems to always go to Max and mins

Game Score At Local Minima: [7.57872399], MaxScore [7.57872399]
Game Score At Local Minima: [7.57872399], MaxScore [7.57872399]
Game Score At Local Minima: [7.57872399], MaxScore [7.57872399]
Game Score At Local Minima: [7.00132667], MaxScore [7.57872399]
Game Score At Local Minima: [7.58937061], MaxScore [7.58937061]
Game Score At Local Minima: [7.57872399], MaxScore [7.58937061]
Game Score At Local Minima: [7.56219315], MaxScore [7.58937061]
Final Game
   max_players  min_age  min_players  playing_time  expansion
0            5       10            1            56          0
['Campaign / Battle Card Driven' 'Chit-Pull System' 'Trading']
['Modern Warfare' 'Sports' 'Wargame']
Rating: [7.58937061]
