In [5]:
import pandas as pd

In [6]:
orig_data = pd.read_csv('../data/BGG_Data_Set.csv', sep=';')
orig_data.head()

Unnamed: 0,ID,Name,Year Published,Min Players,Max Players,Play Time,Min Age,Users Rated,Rating Average,BGG Rank,Complexity Average,Owned Users,Mechanics,Domains
0,174430.0,Gloomhaven,2017.0,1,4,120.0,14,42055,879244.0,1,38604,68323.0,"Action Queue, Action Retrieval, Campaign / Bat...","Strategy Games, Thematic Games"
1,161936.0,Pandemic Legacy: Season 1,2015.0,2,4,60.0,13,41643,861278.0,2,28405,65294.0,"Action Points, Cooperative Game, Hand Manageme...","Strategy Games, Thematic Games"
2,224517.0,Brass: Birmingham,2018.0,2,4,120.0,14,19217,866337.0,3,39129,28785.0,"Hand Management, Income, Loans, Market, Networ...",Strategy Games
3,167791.0,Terraforming Mars,2016.0,1,5,120.0,12,64864,843254.0,4,32406,87099.0,"Card Drafting, Drafting, End Game Bonuses, Han...",Strategy Games
4,233078.0,Twilight Imperium: Fourth Edition,,3,6,,14,13468,,5,42219,16831.0,"Action Drafting, Area Majority / Influence, Ar...","Strategy Games, Thematic Games"


In [7]:
data = pd.read_pickle('../data/cleaned_data_with_clusters.pkl')
data.head()

Unnamed: 0,Name,Year Published Bins,Min Players,Max Players,Play Time Bins,Min Age,Mechanics,Domains,Rating Average,Complexity Average,Popularity Score,Clusters
0,Gloomhaven,10s,1.0,4.0,2 hrs,14.0,"[Action Queue, Action Retrieval, Campaign / Ba...","[Strategy Games, Thematic Games]",8.79244,3.8604,10.0,"[2, 3, 4, 5, 6]"
1,Pandemic Legacy: Season 1,10s,2.0,4.0,1 hr,13.0,"[Action Points, Cooperative Game, Hand Managem...","[Strategy Games, Thematic Games]",8.61278,2.8405,10.0,"[2, 3, 4, 5]"
2,Brass: Birmingham,10s,2.0,4.0,2 hrs,14.0,"[Hand Management, Income, Loans, Market, Netwo...",[Strategy Games],8.66337,3.9129,10.0,"[1, 2, 4, 5, 6]"
3,Terraforming Mars,10s,1.0,5.0,2 hrs,12.0,"[Card Drafting, Drafting, End Game Bonuses, Ha...",[Strategy Games],8.43254,3.2406,10.0,"[1, 2, 4, 5, 6]"
5,Gloomhaven: Jaws of the Lion,10s,1.0,4.0,2 hrs,14.0,"[Action Queue, Campaign / Battle Card Driven, ...","[Strategy Games, Thematic Games]",8.87363,3.5472,10.0,"[1, 2, 3, 4, 5, 6]"


In [8]:
from pathlib import Path
from typing import Dict
from pydantic import BaseModel, Field
from enum import Enum
import joblib
import json
import os


class Mechanic(str, Enum):
    action_points = "Action Points",
    cooperative_game = "Cooperative Game",
    hand_management = "Hand Management",
    legacy_game = "Legacy Game",
    point_to_point_movement = "Point to Point Movement",
    set_collection = "Set Collection",
    trading = "Trading",
    variable_player_powers = "Variable Player Powers"


class Domain(str, Enum):
    strategy_games = "Strategy Games",
    thematic_games = "Thematic Games"


class BoardGameDTO(BaseModel):
    year_published: int = Field(..., ge=-3600, lt=2030)
    min_players: int = Field(..., ge=1, le=8)
    max_players: int = Field(..., g2=1, le=100)
    play_time: int = Field(..., gt=0, lt=660)
    min_age: int = Field(..., ge=2, le=21)
    mechanics: list[Mechanic]
    domains: list[Domain]


class PredictionDTO(BaseModel):
    average_complexity: float = Field(..., ge=0, le=5)
    average_rating: float = Field(..., ge=0, le=10)
    popularity_score: float = Field(..., ge=0, le=10)



In [9]:
from typing import Any


class FeatureTransfomer:
    def __init__(self, artifacts_path: str):

        # Load from file
        transfs = self._load_json(artifacts_path, "data_transformation.json")

        # Bins
        self.decade_bins = transfs.get("decade_bins")
        self.decade_labels = transfs.get("decade_labels")

        self.play_time_bins = transfs.get("play_time_bins")
        self.play_time_labels = transfs.get("play_time_labels")

        # Mechanic Clusters
        self.mechanic_cluster_mapping = transfs.get("mechanic_cluster_mapping")

        # MultiLabelBinarizers
        self.domains_mlb = joblib.load(f'{artifacts_path}{os.sep}Domains_MLB.pkl')
        self.clusters_mlb = joblib.load(f'{artifacts_path}{os.sep}Clusters_MLB.pkl')

        #Normalisation Scaler
        self.scaler = joblib.load(f'{artifacts_path}{os.sep}Scaler.pkl')

        # Prediction Models
        self.rating_average_mdl = joblib.load(f'{artifacts_path}{os.sep}Rating_Average_Model.pkl')
        self.complexity_average_mdl = joblib.load(f'{artifacts_path}{os.sep}Complexity_Average_Model.pkl')
        self.popularity_score_mdl = joblib.load(f'{artifacts_path}{os.sep}Popularity_Score_Model.pkl')

    @staticmethod
    def _load_json(folder_path: str, file_name: str) -> Dict[str, Any]:
        path = Path(folder_path + os.sep + file_name)
        if not path.is_file():
            raise FileNotFoundError(f"JSON file not found at: {folder_path + os.sep + file_name}")

        try:
            with open(path, "r", encoding="utf-8") as file:
                return json.load(file)
        except json.JSONDecodeError as e:
            raise ValueError(f"Error decoding JSON file at {folder_path + os.sep + file_name}: {e}")

    @staticmethod
    def _bin(value: int, bins: list[int], labels: list[str]) -> int:
        return pd.cut([value], bins=bins, labels=labels).astype('category').codes[0]

    @staticmethod
    def _map_to_cluster(mechanics: list[Mechanic], clusters: Dict[str, int]) -> list[list[int]]:
        return [[clusters.get(mechanic) for mechanic in mechanics]]

    def _transform(self, dto: BoardGameDTO):

        # Binning
        year_bin = self._bin(dto.year_published, self.decade_bins, self.decade_labels)
        play_time_bin = self._bin(dto.play_time, self.play_time_bins, self.play_time_labels)

        # Creating Dict
        prediction_df = pd.DataFrame([{
            "Year Published Bins": year_bin,
            "Min Players": dto.min_players,
            "Max Players": dto.max_players,
            "Play Time Bins": play_time_bin,
            "Min Age": dto.min_age
        }])
        
        # Scaling
        to_scale_columns = ["Min Players", "Max Players", "Min Age"]
        prediction_df[to_scale_columns] = self.scaler.transform(prediction_df[to_scale_columns])

        # Domains
        domains_enc = self.domains_mlb.transform([dto.domains])
        for i, class_name in enumerate(self.domains_mlb.classes_):
            prediction_df[f"Domains_{class_name}"] = domains_enc[:, i].tolist()

        # Clusters
        clusters = self._map_to_cluster(dto.mechanics, self.mechanic_cluster_mapping)
        clusters_enc = self.clusters_mlb.transform(clusters)
        for i, class_name in enumerate(self.clusters_mlb.classes_):
            prediction_df[f"Clusters_{class_name}"] = clusters_enc[:, i].tolist()

        
        return prediction_df
    

    def predict(self, dto: BoardGameDTO) -> PredictionDTO:
        encoded = self._transform(dto)
        
        return PredictionDTO(
            average_complexity = self.complexity_average_mdl.predict(encoded),
            average_rating = self.rating_average_mdl.predict(encoded),
            popularity_score = self.popularity_score_mdl.predict(encoded),
        )
        


test = BoardGameDTO(
    year_published=2015,
    min_players=2,
    max_players=4,
    play_time=60,
    min_age=13,
    mechanics=[
        Mechanic.action_points,
        Mechanic.cooperative_game,
        Mechanic.hand_management,
        Mechanic.legacy_game,
        Mechanic.point_to_point_movement,
        Mechanic.set_collection,
        Mechanic.trading,
        Mechanic.variable_player_powers],
    domains=[
        Domain.strategy_games,
        Domain.thematic_games
    ]
)

ft = FeatureTransfomer(artifacts_path="../artifacts/")
ft.predict(test)

PredictionDTO(average_complexity=2.5849189921366746, average_rating=7.223395802309839, popularity_score=9.324642337090344)