In [29]:
# import only required libraries
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
# function to identify numerical columns within a dataframe
def get_numerical_columns(df):
    numeric_types = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64'] # set all numeric types
    return df.select_dtypes(include=numeric_types).columns.to_list() # return numeric columns as a list

# function to apply log transformation technique on dataset
def apply_log_transformation(df_original):
    df = df_original.copy() # get a copy of the dataset
    for column in df.columns.to_list(): # apply log transformation to all columns
        df[column] = df[column].map(lambda value : np.log(value) if value > 0 else 0)
    return df

# function to scale an entire dataset, returns only numeric columns scaled
def standard_scale_dataset(df, scaler_path):
    # get numerical columns
    numerical_columns = get_numerical_columns(df)
    scaler = load_model(scaler_path) # load the Standard Scaler
    
    df_numeric = df[numerical_columns]
    df_scaled = scaler.transform(df_numeric.to_numpy())
    df_scaled = pd.DataFrame(df_scaled, columns=df_numeric.columns.to_list())
    return df_scaled

# function to extract polynomial features from a dataset
def extract_polynomial_features(df, degree=2, test_size=0.3):
    polynomial = PolynomialFeatures(degree=degree, include_bias=False, interaction_only=False)
    features_polynomial = polynomial.fit_transform(df) # x = df, there's no y
    return pd.DataFrame(features_polynomial)

# function to load a model from a pickle file
def load_model(path):
    return pickle.load(open(path, 'rb'))

In [30]:
def make_prediction(df, model_path, scaler_path):
    
    df = apply_log_transformation(df) # apply log transformation on columns with outliers
    
    df_scaled = standard_scale_dataset(df, scaler_path) # apply standard scaling to the dataset (excludes non-numeric columns)
    
    complex_df = extract_polynomial_features(df_scaled, degree=2) # extract polynomial features
    
    model = load_model(model_path) # load the model
    
    y_pred = model.predict(complex_df) # predict the new data target
    
    return y_pred

data = [[1441.97, 1035.8, 950, 950, 0, 0, 0, 0, 0]]
columns = ['team1_rating', 'team2_rating', 'team1_hero3_wins', 'team1_hero5_wins', 'team2_hero1_wins',
           'team2_hero2_wins', 'team2_hero3_wins', 'team2_hero4_wins', 'team2_hero5_wins']
df_test = pd.DataFrame(data=data, columns=columns)
display(df_test)

model_path = '3_GradientBoostingClassifier.pkl'
scaler_path = 'StandardScaler.pkl'

make_prediction(df_test, model_path, scaler_path)

Unnamed: 0,team1_rating,team2_rating,team1_hero3_wins,team1_hero5_wins,team2_hero1_wins,team2_hero2_wins,team2_hero3_wins,team2_hero4_wins,team2_hero5_wins
0,1441.97,1035.8,950,950,0,0,0,0,0


array([1], dtype=int64)