In [2]:
%matplotlib inline

# General libraries
import pandas as pd
import numpy as np
import os
import copy
import warnings
import statsmodels.api as sm
from scipy import stats

# Plotting and printing libraries
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import pprint

# Model-building libraries
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import normalize, MinMaxScaler, StandardScaler, RobustScaler, Normalizer, scale

# SK-learn libraries for learning
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import BernoulliNB, GaussianNB, MultinomialNB
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC, LinearSVC

# SK-learn libraries for evaluation
from sklearn.metrics import confusion_matrix, classification_report
from sklearn import metrics

  from pandas.core import datetools


In [3]:
label_names = {'1':'Spruce/Fir', 
               '2':'Lodgepole Pine', 
               '3':'Ponderosa Pine', 
               '4':'Cottonwood/Willow', 
               '5':'Aspen',
               '6':'Douglas Fir',
               '7':'Krummholz'}

In [4]:
def CrossValidateModel(model, X, y, name="model", folds=5, verbose=False):
    '''Takes an sklearn or similar model, an X feature set, and a y label set.  
    It performs crossvalidation across n folds and prints the results.'''
    np.random.seed(10)
    
    reportFields = {'precision': [], 'recall': [], 'f1-score': [], 'support': []}
    generalReport = {'micro avg': copy.deepcopy(reportFields), 
                     'macro avg': copy.deepcopy(reportFields), 
                     'weighted avg':copy.deepcopy(reportFields)}
    for key in label_names:
        generalReport[key] = copy.deepcopy(reportFields)
    
    skf = StratifiedKFold(n_splits=folds, shuffle=True)
    for train_indexes, valid_indexes in skf.split(X, y):
        foldXTrain, foldYTrain = X.iloc[train_indexes], y.iloc[train_indexes]
        foldXValid, foldYValid = X.iloc[valid_indexes], y.iloc[valid_indexes] 

        model.fit(foldXTrain, foldYTrain)
        foldValidPred = model.predict(foldXValid)
        foldReport = metrics.classification_report(foldValidPred, foldYValid, output_dict=True)
        
        for key in foldReport:
            for outputField in reportFields:
                generalReport[key][outputField].append(foldReport[key][outputField])
    title = f'Model: {name}'
    if verbose: title += f', with {folds} folds' 
    print(title)
    
    fields = sorted(generalReport.keys()) if verbose else ['weighted avg']
    fieldLabels = [label_names[field] if field in label_names.keys() else field for field in fields]
    
    for i in range(len(fields)):
        output = f'\t{fieldLabels[i]:<20} | '
        for outputField in reportFields:
            output += f'{outputField}: {np.mean(generalReport[fields[i]][outputField]):>5.2f} | '
        print(output)
    if verbose: print()