In [None]:
# ignore warnings
def warn(*args, **kwargs):
    pass

import warnings
warnings.warn = warn

# Models
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
import xgboost as xgb 

## Pre-Processing
from sklearn.model_selection import train_test_split, KFold, cross_val_score #train/test split & k-fold cross validation
from sklearn.preprocessing import StandardScaler #scaler
from sklearn.decomposition import PCA #principle component analysis

## Scoring 
import scipy.stats as stats
from sklearn import metrics
from sklearn.metrics import confusion_matrix

In [None]:
# Game Outcome Predictors 

x = classification_df.loc[:, classification_df.columns != 'outcome']
y = classification_df['outcome']

# Creating the Train and Test Split

# Use a train and test split for initial training & testing
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.30, random_state=42)

# Length of Values

pd.DataFrame({
    "x_train count": len(x_train),
    "x_test count":len(x_test),
    "y_train count":len(y_train), 
    "y_test count":len(y_test)
},index=[0]).melt()

# Building, Training, and Testing the Model Stack
## Building each model with some fun inheritance! 

class model_development: 
    
    def __init__(self, model, predictors, target): 
        self.model = model
        self.predictors = predictors
        self.target = target 

    def ensemble(names: List[str], models: List) -> Dict[str, Any]: 
        """ Creates a dictionary with each model name and the model associated """
        models = {names[i]: models[i] for i in range(len(names))}
        return models

    def evaluate(models, x_train, y_train, x_test) -> Dict[str, Any]:
        """ 
        1) Conducts PCA for feature selection and k-fold cross-validation on each model 
        2) Evaluates each model with accuracy, precision, and recall scores and returns all average scores 
        """

        pca = PCA(n_components=10)
        pca.fit_transform(x_train, y_train)
        
        cv = KFold(n_splits=5, random_state=0, shuffle=True)
            
        scores = {
            names: 
                {
                    'Accuracy': cross_val_score(models, x_train, y_train, scoring='accuracy', cv=cv).mean(), 
                    'Precision': cross_val_score(models, x_train, y_train, scoring='precision', cv=cv).mean(), 
                    'Recall': cross_val_score(models, x_train, y_train, scoring='recall', cv=cv).mean()   
                }
             for (names, models) in models.items()
        }

        return scores
    
    def predict(model, x_test): 
        predictions = model.predict(x_test)
        return predictions

## Pass in the models we wish to stack 

models = model_development.ensemble(
    names =  ['logit', 'svm'],
    models =  [
        LogisticRegression(),
        SVC(kernel='rbf', gamma=0.05),
    ]
)

models

## Process, Train, Evaluate

summary = model_development.evaluate(models, x_train, y_train, x_test)
summary = pd.DataFrame.from_records(summary)
summary

# Training and Implementing the Stacking Model
# Train and implement stacking model 

def stacking_model(x_train, y_train, x_test, n_folds):
    
    global cv 
    cv = KFold(n_splits= n_folds, random_state=0, shuffle=True)
    
    gbm = xgb.XGBClassifier(
        n_estimators= 2000,
        max_depth= 4,
        min_child_weight= 2,
        gamma=0.9,                        
        subsample=0.8,
        colsample_bytree=0.8,
        objective= 'binary:logistic',
    scale_pos_weight=1).fit(x_train, y_train)
    
    return gbm

def scores():
    
    scores = [
        cross_val_score(xgb, x_train, y_train, scoring='accuracy', cv=cv).mean(), 
        cross_val_score(xgb, x_train, y_train, scoring='precision', cv=cv).mean(), 
        cross_val_score(xgb, x_train, y_train, scoring='recall', cv=cv).mean()   
    ]

    return scores 

xgb = stacking_model(x_train, y_train, x_test, 10)


summary['stacked_model'] = scores()
summary

The XGBoost Classifier boasts a high accuracy score upon the data, however to prevent overfitting we use this to stack our other algorithms for a final output:
clf_stack = StackingClassifier(classifiers = models, meta_classifier = lr, use_probas = True, use_features_in_secondary = True)

# Generating Predictions from the Stacked Model
# Generate Predictions. Predictions will favor the away team (1: Win, 0: Loss)

games['predictions'] = model_development.predict(xgb, x)


pd.DataFrame({
    'Predicted': games.predictions.value_counts(),
    'Actual': games.outcome.value_counts()
})

conf_matrix = confusion_matrix(games['outcome'], games['predictions'])

plt.figure(figsize=(16,5))

group_names = ['True Pos', 'False Pos', 'False Neg', 'True Neg']

counts = ["{0:0.0f}".format(value) for value in conf_matrix.flatten()]
percentage = ["{0:.2%}".format(value) for value in conf_matrix.flatten()/np.sum(conf_matrix)]

labels = [f"{i}\n{j}\n{k}" for i, j, k in zip(group_names, counts, percentage)]
labels = np.asarray(labels).reshape(2,2)

ax = sns.heatmap(conf_matrix, annot=labels, cmap='Greens', fmt='')
ax.set_title('Confusion Matrix for Predicted Outputs')
ax.set_xlabel('Predicted')
ax.set_ylabel('Actual')
ax.xaxis.set_ticklabels(['Home', 'Away'])
ax.yaxis.set_ticklabels(['Home', 'Away'])

# Assessing Output Predictions

Per the classification of the categorical game outcome, predictions are: 
- _*Home Win*_ : 0
- _*Away Win*_ : 1

The confusion matrix will show the: 
- True Positives
- False Positives
- True Negatives
- False Negatives
# Evaluation
In summary, the models trained especially well with test data and similar results yield from training them on the original data. The random forest algorithm is overfit with 100% accuracy and appears to have been confused with the binary inputs and classification variables, so some cross-validation and better processing will need to be done. 