In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score

In [10]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

### Functions for machine learning algorithm's useage

In [5]:
def cross_validation(model, training_features, training_labels,metric, kfolds):
    '''
    A function that applies cross_validation on given machine learning algorithm, data and number of data splits
    
    input:
            model (sklearn machine learning algorithm api): LogisticRegression, XGBoost, etc..
            training_features (DataFrame): (X_train)
            training_labels (DataFrame): (y_train)
            metric (String, sklearn scoring metrics api): 
            kfolds (int): number of splits to perform on the datasets
    
    output:
            scores (list): a list with scoring values for each K split
            average_score (float): the mean of scores
            
    '''
    scores = cross_val_score(estimator=model, X=training_features.values, y=training_labels.values.ravel(), scoring=metric, cv=kfolds)
    average_score = np.mean(scores)
    return scores, average_score

In [6]:
def hyper_tuning(model, training_features, training_labels,metric, hyper_params, cv):
    '''
    A function that applies GridSearch (hyper parameter tuning) for a given machine learning algorithm.
    
    *In order to get the best of this function it is recommended that you'll use it on algorithms with many hyper parameters*
    
    input:
            model (sklearn machine learning algorithm api): LogisticRegression, XGBoost, etc..
            training_features (DataFrame): (X_train)
            training_labels (DataFrame): (y_train)
            metric (String, sklearn scoring metrics api): 
            hyper_params (list): A list which contains a dictionary with it's keys as names of a model's hyper parameters and 
                                 values to test on
            cv (int): cross validation splitting strategy (3-fold, 5-fold). *for faster performance choose 3 fold cv*
    
    output: 
            best_find (machine learning model): returns the given machine learning algorithm with the best hyper parameters
            best_score (float): returns the best score achieved by the model with the best hyper parameters
            
    '''
    
    grid_search = GridSearchCV(model, param_grid=hyper_params, scoring=metric, cv=cv, n_jobs=-1)
    grid_search.fit(training_features.values, training_labels.values.ravel())
    best_find = grid_search.best_estimator_
    best_score = grid_search.best_score_
    return best_find, best_score

In [7]:
def model_performance(trained_model, testing_features, testing_labels, metrics = []):
    '''
    A functions that applies multiple scoring metrics given by the user on the testing set
    
    input:
            trained_model (sklearn machine learning algorithm api): An already trained machine learning algorithm
            testing_features (DataFrame): (X_test) 
            testing_label (DataFrame): (y_test)
            metrics (list of sklearn metric api): a list which contains the desired scoring metrics
    
    output:
            scores (dictionary): a dictionary which contains the scoring method as a key and the score as value
            
    '''
    scores = {}
    num_metrics = 1
    predictions = trained_model.predict(testing_features.values)
    for metric in metrics:
        
        if str(metric) == str(f1_score):
            score = f1_score(testing_labels.values.ravel(), predictions, average='micro')
            scores[num_metrics] = score
            num_metrics += 1
        else:
            score = metric(testing_labels.values.ravel(), predictions)
            scores[num_metrics] = score
            num_metrics += 1
        
    return scores