<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

In [None]:
# Define function to fit our models and predict on the training and test sets

def clf_pred(models):
    '''
    Intakes our DataFrame with our models 
        and our X & y, train & test data
    
    Returns a DataFrame with all of our models, 
        metrics and accuracy scores   
    '''
    
    # Initialize matrix to fill
    clf_df = np.zeros((len(models), 25), dtype=object)
    
    for i,model in models.iterrows():
        

        # Classifier Names & Models
        clf_df[i,0] = model['clf_name']
        clf_df[i,1] = model['clfs'] 
        
        ### Assign Variables ###
        # We do this here because XGBoost takes 
        # np.array format unlike the other models
        X_train = model['X_train']
        X_test = model['X_test']
        y_train = model['y_train']
        y_test = model['y_test']

        ### Initialize Timer ### 
        start_time = time.time()
        
        ### Fit & Predict ###
    
        # Fit Model
        classifier = model['clfs'].fit(X_train, y_train)
        
        # Calculate time to fit model
        stop_time = time.time()
        runtime = (stop_time - start_time)
        clf_df[i,2] = runtime
    
        # Predict
        y_pred_test = classifier.predict(X_test)
        y_pred_train = classifier.predict(X_train)
        clf_df[i,3] = y_pred_test
        clf_df[i,4] = y_pred_train
    
        # y_score
        y_score_test = classifier.predict_proba(X_test)
        y_score_train = classifier.predict_proba(X_train)
        clf_df[i,5] = y_score_test
        clf_df[i,6] = y_score_train
    
        # False & True Positive Rates
        clf_df[i,7], clf_df[i,8], thresholds_test = roc_curve(y_test, y_score_test[:,1])
        clf_df[i,9], clf_df[i,10], thresholds_train = roc_curve(y_train, y_score_train[:,1])
        
        ### Accuracy Scores ### 
        
        # Precision
        clf_df[i,11] = precision_score(y_test, y_pred_test)
        clf_df[i,12] = precision_score(y_train, y_pred_train)
        
        # Recall 
        clf_df[i,13] = recall_score(y_test, y_pred_test)
        clf_df[i,14] = recall_score(y_train, y_pred_train)        
        
        # F1
        clf_df[i,15] = f1_score(y_test, y_pred_test)
        clf_df[i,16] = f1_score(y_train, y_pred_train)  
        
        # Accuracy
        clf_df[i,17] = accuracy_score(y_test, y_pred_test)
        clf_df[i,18] = accuracy_score(y_train, y_pred_train)
        
        # AUC
        clf_df[i,19] = roc_auc_score(y_test, y_pred_test)
        clf_df[i,20] = roc_auc_score(y_train, y_pred_train)
        
        ### Add X & y values to have everything in one place ### 
        # These are class balanced/resampled #
        clf_df[i,21] = X_train
        clf_df[i,22] = X_test
        clf_df[i,23] = y_train
        clf_df[i,24] = y_test        
    
    ### Create DataFrame ###
    
    # Column Names
    columns = ['Classifier',
               'Model',
               'Runtime',
               'Test Preds',
               'Train Preds',
               'Test y-Score',
               'Train y-Score',
               'Test FPR',
               'Test TPR',
               'Train FPR',
               'Train TPR',
               'Test Precision',
               'Train Precision',
               'Test Recall',
               'Train Recall',
               'Test F1',
               'Train F1',
               'Test Accuracy',
               'Train Accuracy',
               'Test ROC AUC',
               'Train ROC AUC',
               'X_train',
               'X_test',
               'y_train',
               'y_test'
              ]
    
    # Create DataFrame
    clf_df = pd.DataFrame(clf_df, columns=columns)
    
    return clf_df