In [1]:
import numpy as np
import pickle
import os

def predict_sales(platform,genre,publisher,region,model_type,models_and_encoder_dir):
    '''
    This function predicts the sales of a video game for the given platform,
    genre, publisher, and region. The type of model used in the prediction is also 
    selected, and a directory containing these models and the label encoder must
    also be specified.
    
    Valid regions are 'na','eu','jp', and 'global' (not case-sensitive)
    Valid models are 'rf','knn','dt', and 'xg' (not case-sensitive)
    
    :param platform: Platform on which the game is published i.e. 'GBA','PS2',etc.
    :type platform: str
    :param genre: Genre of the game i.e. 'Sports','Action',etc.
    :type genre: str
    :param publisher: Company that publishes the game i.e. 'Nintendo','Sega',etc.
    :type publisher: str
    :param region: Region where sales will be predicted 
    :type region: str
    :param models_and_encoder_dir: Directory containing the pickled files of the models and encoder
    :type models_and_encoder_dir: str
    
    :return sales: Predicted sales in millions of $
    :type sales: float
    '''
    
    # Assert that all inputs are strings
    assert all(isinstance(param,str) for param in locals().values())
    
    # Assert that a valid region and model type have been given
    region = region.lower()
    regions = ['na','eu','jp','global']
    model_type = model_type.lower()
    models_d = {'rf':'rf_model.pkl',
                'knn':'knn_model.pkl',
                'dt':'dt_model.pkl',
                'xg':'xg_model_'} # xg requires a different model for each region
    assert region in regions
    assert model_type in models_d.keys()
    
    # Assert that the pickle files for the encoder and the feature values exist
    encoder_file = os.path.join(models_and_encoder_dir,"encoder.pkl")
    feature_values_file = os.path.join(models_and_encoder_dir,"feature_values.pkl")
    assert os.path.exists(encoder_file)
    assert os.path.exists(feature_values_file)
    
    # Load the label encoder and the possible values for the features
    with open(encoder_file,"rb") as f:
        le = pickle.load(f)
        
    with open(feature_values_file,"rb") as f:
        feature_values = pickle.load(f)
        
    # Assert that the given parameters are viable inputs to the models
    assert platform in feature_values['Platform'] 
    assert genre in feature_values['Genre']
    assert publisher in feature_values['Publisher']
    
    # Encode the labels for the new input
    x1 = le['Platform'].transform([platform])[0]
    x2 = le['Genre'].transform([genre])[0]
    x3 = le['Publisher'].transform([publisher])[0]
    x_new = np.array([x1,x2,x3]).reshape(-1,3)
    
    
    if model_type == 'xg':
        # Load specific regional model for xg
        model_file = os.path.join(models_and_encoder_dir,models_d[model_type]+region+".pkl")
        with open(model_file,"rb") as f:
            model = pickle.load(f)
                                  
        # Make sales prediction (only outputs sales for given region)
        preds = model.predict(x_new)
        sales = preds[0]
                                  
    else:
        # Load the specified model
        model_file = os.path.join(models_and_encoder_dir,models_d[model_type])
        with open(model_file,"rb") as f:
            model = pickle.load(f)
        
        # Make sales prediction (outputs all sales for NA,EU,JP, and Global)
        preds = model.predict(x_new)
        
        # Extract specific regional sales
        sales = preds[0][regions.index(region)]
    
    return sales
    

In [2]:
#Examples
predict_sales('GC','Action','Nintendo','JP',"RF","models_and_encoder")

0.00036993849563654054

In [3]:
predict_sales('GC','Action','Nintendo','JP',"XG","models_and_encoder")

0.1227409355159892

In [4]:
predict_sales('GC','Sports','Nintendo','JP',"RF","models_and_encoder")

0.0003430882057027266

In [5]:
predict_sales('DS','Racing','Sega','JP',"KNN","models_and_encoder")

0.002