# Columbia REU 2018 - Classifiers
- Currently the plan is to build an indoor localization classier from multiple weak learners.
- 1) Wi-Fi localization classifier
- 2) Bluetooth localization classifier
- 3) Miscelleanous classifier (altitude, luminosity, magnetic field, etc.). This probably will be the weakest classifier because the only feature that should change the most is altitude.
- 4) Confirm with Henning wether this should be room only or room/building.

We will be using the following classifiers as suggested by Gabriel Young.
i)   K-NN (Try 3-NN, it supposedly is an unsually powerful classifier).
ii)  penalized logistic regression
iii) random forest 
iv)  neural network
v)   radial basis kernel 
 
The purpose of this code was that it was written to be generic as possible as to be used for all other Machine Learning Projects

# Begin Loading all Libraries

In [4]:
# Generic
import numpy as np
import random
import time
import matplotlib.pyplot as plt
from sklearn.metrics import brier_score_loss, classification_report, accuracy_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale, LabelEncoder, StandardScaler
from mlxtend.plotting import plot_decision_regions

# How to tune
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

# LDA/QDA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

# Neural Network
from sklearn.neural_network import MLPClassifier


# Random Forest
from sklearn.ensemble import RandomForestClassifier

# SVM
from sklearn import svm

from sklearn.decomposition import PCA

# Bayes
from sklearn.naive_bayes import GaussianNB
from sklearn.calibration import CalibratedClassifierCV

# Logistic Regression
from sklearn.linear_model import LogisticRegression

# KNN
from sklearn.neighbors import KNeighborsClassifier

# Should read this...
# https://towardsdatascience.com/fine-tuning-a-classifier-in-scikit-learn-66e048c21e65

# Define Functions to Load your data set and build CV Test
- Will Consider making a global Label Encoder to already support plotting ahead of time?

In [7]:
# This is only for this specific case...
def read_data_set(filename, isnumeric=False):
    x = np.genfromtxt(filename, delimiter=',', skip_header=1)
    if isnumeric:
        y = x[:, 0]
    else:
        y = np.genfromtxt(filename, delimiter=',', skip_header=1, dtype=str, usecols=0)

    # x must delete first column which is the label
    x = x[:, 1:]
    return x, y


def get_cv_set(training_set, test_set, percentile=0.2):
    row = np.shape(training_set)[0]
    col = np.shape(training_set)[1]
    sample_idx = random.sample(range(row), int(percentile * row))

    # Get your CV data
    cv_train = training_set[sample_idx[:], 0:col]
    cv_test = test_set[sample_idx[:]]

    # Remove CV data from original
    set_diff = np.setdiff1d(np.arange(row), sample_idx)

    training_set = training_set[set_diff[:], 0:col]
    test_set = test_set[set_diff[:]]
    return training_set, test_set, cv_train, cv_test

# Some Machine Learning situations are ok with getting first n answers as your guess. In Indoor Localization, an emergency worker will try to look at another room if the first guess is wrong.
- KNN
- Naive Bayes 
- Random Forest
- Combined Classifier
# DO NOT have a decision function!

In [21]:
def top(clf, test_x, test_y, extra_rooms=1):
    # Get your list...
    # Sort it such that highest probabilities come first...
    # https://stackoverflow.com/questions/613183/how-do-i-sort-a-dictionary-by-value
    # To print highest first, set reverse=True
    probability_dict = []
    for i in range(len(test_y)):
        if hasattr(clf, 'decision_function'):
            probability_dict[i] = sorted([(v, k) for k, v in probability_dict[i].items()], reverse=True)
        else:
             probability_dict.append(dict(zip(clf.classes_, clf.predict_proba(test_x)[i])))
        probability_dict[i] = sorted([(v, k) for k, v in probability_dict[i].items()], reverse=True)

    success = 0
    # Let us say test the first 3 rooms? See if it matches!
    for i in range(len(test_y)):
        # print(probability_dict[i])
        for j in range(extra_rooms):
            if probability_dict[i][j][1] == test_y[i]:
                success = success + 1
                break
    print("Test Error for " + str(extra_rooms) +" Rooms: " + str(success/len(test_y)))


# Plot CV Error from GridSearch/RandomSearch

In [9]:
def plot_grid_search(cv_results, grid_param, name_param):
    # Get Test Scores Mean and std for each grid search
    scores_mean = cv_results['mean_test_score']
    scores_mean = np.array(scores_mean).reshape(len(grid_param))

    # scores_sd = cv_results['std_test_score']
    # scores_sd = np.array(scores_sd).reshape(len(grid_param))

    # Plot Grid search scores
    _, ax = plt.subplots(1, 1)

    # Param1 is the X-axis, Param 2 is represented as a different curve (color line)
    ax.plot(grid_param, scores_mean, label="CV-Curve")

    ax.set_title("Grid Search Scores", fontsize=20, fontweight='bold')
    ax.set_xlabel(name_param, fontsize=16)
    ax.set_ylabel('CV Average Score', fontsize=16)
    ax.legend(loc="best", fontsize=15)
    ax.grid('on')
    plt.show()


# Build both Scaling and Scale & PCA functions

In [12]:
def scale(train_x, test_x):
    scaler = StandardScaler()
    # Don't cheat - fit only on training data
    scaler.fit(train_x)
    X_train = scaler.transform(train_x)
    # apply same transformation to test data
    X_test = scaler.transform(test_x)
    return X_train, X_test


def scale_and_pca(train_x, test_x):
    scaled_train_x, scaled_test_x = scale(train_x, test_x)
    pr_comp = PCA(n_components=0.99, svd_solver='full')
    pr_comp.fit(scaled_train_x)
    return pr_comp.transform(scaled_train_x), pr_comp.transform(scaled_test_x)


# KNN MODULE

In [13]:
# https://www.pyimagesearch.com/2016/08/15/how-to-tune-hyperparameters-with-python-and-scikit-learn/
def tune_knn(train_x, train_y, test_x, test_y):
    # Get Number of features
    rows = np.shape(train_x)[0]
    print("There are " + str(rows) + " features")

    if rows > 101:
        rows = 101
    else:
        rows = int((rows/2) - 1)

    print("Highest value of k is: " + str(rows) + " features")
    n = np.arange(3, rows, 2)
    param_grid = {'n_neighbors': n}
    model = KNeighborsClassifier()
    start = time.time()
    # tune the hyper parameters via a randomized search
    best_knn = GridSearchCV(model, param_grid, n_jobs=-1)
    best_knn.fit(train_x, train_y)

    # Plot the CV-Curve
    plot_grid_search(best_knn.cv_results_, n, 'n_neighbors')

    # evaluate the best randomized searched model on the testing data
    print("[INFO] KNN-Best Parameters: " + str(best_knn.best_params_))
    print("[INFO] randomized search took {:.2f} seconds".format(time.time() - start))
    print("Training Score is: " + str(best_knn.score(train_x, train_y)))
    predictions = best_knn.predict(test_x)
    print("Testing Score is: " + str(accuracy_score(test_y, predictions)))
    classification_report(test_y, predictions, target_names=best_knn.classes_)
    top(best_knn, test_x, test_y, 3)
    return best_knn


# Logistic Regression Module

In [15]:
def logistic_linear(train_x, train_y, test_x, test_y):
    start = time.time()
    n = np.logspace(-3, 3)
    param_grid = {'C': n}
    log = LogisticRegression(warm_start=False)
    log_model = GridSearchCV(log, param_grid, n_jobs=-1)
    log_model.fit(train_x, train_y)
    plot_grid_search(log_model.cv_results_, n, 'C')

    print("[INFO] Logistic Regression-Best Parameters: " + str(log_model.best_params_))
    print("[INFO] randomized search took {:.2f} seconds".format(time.time() - start))
    print("Training Score is: " + str(log_model.score(train_x, train_y)))

    predictions = log_model.predict(test_x)
    print("Testing Score is: " + str(accuracy_score(test_y, predictions)))
    classification_report(test_y, predictions, target_name=log_model.classes_)
    top(log_model, test_x, test_y, 3)
    return log_model

# Random Forest Module

In [17]:
def get_forest(train_x, train_y, test_x, test_y):
    start_time = time.time()
    best_forest = tune_forest(train_x, train_y)
    print("--- Best Parameter Random Forest Time: %s seconds ---" % (time.time() - start_time))
    print("Best Random Forest Parameters: " + str(best_forest.best_params_))
    print("Training Mean Test Score: " + str(best_forest.score(train_x, train_y)))
    y_hat = best_forest.predict(test_x)
    print("Testing Mean Test Score " + str(metrics.accuracy_score(test_y, y_hat)))

    # for i in range(len(test_y)):
    #    print(predictions[i])

    top(best_forest, test_x, test_y)
    return best_forest


# Citation:
# https://towardsdatascience.com/hyperparameter-tuning-the-random-forest-in-python-using-scikit-learn-28d2aa77dd74
# http://scikit-learn.org/stable/auto_examples/model_selection/plot_randomized_search.html#sphx-glr-auto-examples-model-selection-plot-randomized-search-py
# https://towardsdatascience.com/random-forest-in-python-24d0893d51c0
def tune_forest(train_features, train_labels):
    # Number of trees in random forest
    n_estimators = np.arange(10, 510, 10)
    # Number of features to consider at every split
    max_features = ['auto', 'sqrt']
    # Maximum number of levels in tree
    max_depth = np.arange(3, 20, 1)
    # Minimum number of samples required to split a node
    min_samples_split = np.arange(5, 20, 1)
    # Minimum number of samples required at each leaf node
    min_samples_leaf = np.arange(5, 20, 1)

    random_grid = {
        'n_estimators': n_estimators,
        'max_features': max_features,
        'max_depth': max_depth,
        'min_samples_split': min_samples_split,
        'min_samples_leaf': min_samples_leaf,
        }

    # Step 1: Use the random grid to search for best hyper parameters
    # First create the base model to tune
    rf = RandomForestClassifier(warm_start=False)
    rf_random = RandomizedSearchCV(estimator=rf, param_distributions={'n_estimators': n_estimators},
                                   n_iter=100, cv=3, verbose=2, random_state=42, n_jobs=-1)
    rf_random.fit(train_features, train_labels)
    plot_grid_search(rf_random.cv_results_, n_estimators, 'n_estimators')

    rf = RandomForestClassifier(warm_start=False)
    rf_random = RandomizedSearchCV(estimator=rf, param_distributions={'max_features': max_features},
                                   n_iter=100, cv=3, verbose=2, random_state=42, n_jobs=-1)
    rf_random.fit(train_features, train_labels)
    plot_grid_search(rf_random.cv_results_, max_features, 'max_features')

    rf = RandomForestClassifier(warm_start=False)
    rf_random = RandomizedSearchCV(estimator=rf, param_distributions={'max_depth': max_depth},
                                   n_iter=100, cv=3, verbose=2, random_state=42, n_jobs=-1)
    rf_random.fit(train_features, train_labels)
    plot_grid_search(rf_random.cv_results_, max_depth, 'max_depth')

    rf = RandomForestClassifier(warm_start=False)
    rf_random = RandomizedSearchCV(estimator=rf, param_distributions={'min_samples_split': min_samples_split},
                                   n_iter=100, cv=3, verbose=2, random_state=42, n_jobs=-1)
    rf_random.fit(train_features, train_labels)
    plot_grid_search(rf_random.cv_results_, min_samples_split, 'min_samples_split')

    rf = RandomForestClassifier(warm_start=False)
    rf_random = RandomizedSearchCV(estimator=rf, param_distributions={'min_samples_leaf': min_samples_leaf},
                                   n_iter=100, cv=3, verbose=2, random_state=42, n_jobs=-1)
    rf_random.fit(train_features, train_labels)
    plot_grid_search(rf_random.cv_results_, min_samples_leaf, 'min_samples_leaf')

    # -----------------LAST STEP!-------------------
    # Random search of parameters, using 3 fold cross validation,
    # search across 100 different combinations, and use all available cores
    rf_random = RandomizedSearchCV(estimator=rf, param_distributions=random_grid,
                                   n_iter=100, cv=3, verbose=2, random_state=42, n_jobs=-1)

    # Fit the random search model
    rf_random.fit(train_features, train_labels)
    # TODO: IF I ADD MORE "Features", by definition I must increase number of estimators!!
    return rf_random

# Neural Network Module

In [None]:
# http://scikit-learn.org/stable/auto_examples/neural_networks/plot_mlp_alpha.html#sphx-glr-auto-examples-neural-networks-plot-mlp-alpha-py
# http://scikit-learn.org/stable/auto_examples/neural_networks/plot_mlp_training_curves.html#sphx-glr-auto-examples-neural-networks-plot-mlp-training-curves-py
def get_brain(train_x, train_y, test_x, test_y):
    start_time = time.time()
    clf = tune_brain(train_x, train_y)
    print("--- Best Parameter NN Generation: %s seconds ---" % (time.time() - start_time))
    # Print Training and Test Error
    print("Best NN Parameters: " + str(clf.get_params()))
    print("Training Mean Test Score: " + str(clf.score(train_x, train_y)))
    predictions = clf.predict(test_x)
    print("Testing Mean Test Score: " + str(accuracy_score(test_y, predictions)))
    classification_report(test_y, predictions, target_names=clf.classes_)
    top(clf, test_x, test_y)
    return clf


# Note alpha needs to grow exponentially!
def tune_brain(train_x, train_y):
    # want to go from 0.001 to 1, but on exponential scale!
    alphas = np.logspace(start=-5, stop=0, endpoint=True, num=5)
    hidden_layer = np.arange(3, 10, 1)
    solvers = {'lbfgs', 'adam'}
    param_grid = {'alpha': alphas, 'hidden_layer_sizes': hidden_layer, 'solver':solvers}

    model = MLPClassifier(warm_start=False)
    # ----alpha----
    clf = GridSearchCV(model, param_grid, n_jobs=-1, cv=3)
    clf.fit(train_x, train_y)
    plot_grid_search(clf.cv_results_, alphas, 'alpha')
    # ----hidden layer----
    clf = GridSearchCV(model, param_grid, n_jobs=-1, cv=3)
    clf.fit(train_x, train_y)
    plot_grid_search(clf.cv_results_, hidden_layer, 'hidden_layer')
    # ---solvers----
    clf = GridSearchCV(model, param_grid, n_jobs=-1, cv=3)
    clf.fit(train_x, train_y)
    plot_grid_search(clf.cv_results_, solvers, 'solver')
    # ----------------Final---------------------
    clf = GridSearchCV(model, param_grid, n_jobs=-1, cv=3)
    clf.fit(train_x, train_y)
    return clf


# SVM (Linear and RBF) Module

In [4]:

# Default is 10...
def svc_rbf_param_selection(x, y, n_folds=2):
    c = np.arange(0.01, 1, 0.01)
    gammas = np.arange(0.01, 1, 0.01)
    param_grid = {'C': c, 'gamma': gammas}
    model = svm.SVC(kernel='rbf')

    # Test with just cost...
    rbf_search = GridSearchCV(model, param_grid={'C': c}, cv=n_folds, n_jobs=-1)
    rbf_search.fit(x, y)
    plot_grid_search(rbf_search.cv_results_, c, 'C')

    # Test with just gamma
    rbf_search = GridSearchCV(model, param_grid={'gamma': gammas}, cv=n_folds, n_jobs=-1)
    rbf_search.fit(x, y)
    plot_grid_search(rbf_search.cv_results_, gammas, 'gamma')

    # FINAL STEP
    rbf_search = GridSearchCV(model, param_grid=param_grid, cv=n_folds, n_jobs=-1)
    rbf_search.fit(x, y)
    return rbf_search


# Default is 10...
# Should take about 6 minutes
def svc_linear_param_selection(x, y, n_folds=2):
    c = np.arange(0.01, 1, 0.01)
    param_grid = {'C': c}
    model = svm.SVC(kernel='linear')
    svm_line = GridSearchCV(model, param_grid, cv=n_folds, n_jobs=-1)
    svm_line.fit(x, y)
    plot_grid_search(svm_line.cv_results_, c, 'C')
    return svm_line


# This is always TRUTH, PREDICTION
# http://scikit-learn.org/stable/modules/model_evaluation.html
def svm_linear(train_x, train_y, test_x, test_y):
    start_time = time.time()
    svm_line = svc_linear_param_selection(train_x, train_y)
    print("--- Best Parameter Linear SVM: %s seconds ---" % (time.time() - start_time))
    print("Best Linear Parameters: " + str(svm_line.best_params_))
    print("Linear SVM, Training Mean Test Score: " + str(svm_line.score(train_x, train_y)))
    predictions = svm_line.predict(test_x)
    print("Linear SVM, Testing Mean Test Score: " + str(accuracy_score(test_y, predictions)))
    classification_report(test_y, predictions, labels=svm_line.classes_)
    # for i in range(len(test_y)):
    #    print(predictions[i])
    top(svm_line, test_x, test_y, 3)
    return svm_line


def svm_rbf(train_x, train_y, test_x, test_y):
    start_time = time.time()
    svm_radial = svc_rbf_param_selection(train_x, train_y)
    print("--- Best Parameter RBF: %s seconds ---" % (time.time() - start_time))
    print("Best RBF Parameters: " + str(svm_radial.best_params_))
    print("RBF SVM, Training Mean Test Score: " + str(svm_radial.score(train_x, train_y)))
    y_hat = svm_radial.predict(test_x)
    print("RBF SVM, Testing Mean Test Score " + str(accuracy_score(test_y, y_hat)))
    classification_report(test_y, y_hat, target_names=svm_radial.classes_)
    top(svm_radial, test_x, test_y, 3)
    return svm_radial

# LDA/QDA
- Ask Professor Verma how to fix Collinearity?

In [None]:
def discriminant_line(train_x, train_y, test_x, test_y):
    lda = LinearDiscriminantAnalysis(solver="svd", store_covariance=True)
    lda.fit(train_x, train_y)
    print("Training Score (LDA): " + str(lda.score(train_x, train_y)))
    predictions = lda.predict(test_x)
    print("Prediction Score is (LDA): " + str(accuracy_score(test_y, predictions)))

    # for i in range(len(test_y)):
    #    print(predictions[i])
    classification_report(test_y, predictions, target_names=lda.classes_)
    top(lda, test_x, test_y, 3)
    return lda


def discriminant_quad(train_x, train_y, test_x, test_y):
    qda = QuadraticDiscriminantAnalysis(store_covariance=True)
    qda.fit(train_x, train_y)
    print("Training Score is (QDA): " + str(qda.score(train_x, train_y)))
    predictions = qda.predict(test_x)
    print("Prediction Score is (QDA): " + str(accuracy_score(test_y, predictions)))

    # for i in range(len(test_y)):
    #    print(predictions[i])
    classification_report(test_y, predictions, target_names=qda.classes_)
    top(qda, test_x, test_y, extra_rooms=1)
    return qda

# Bayesian Module

In [18]:
# http://scikit-learn.org/stable/auto_examples/calibration/plot_calibration.html#sphx-glr-auto-examples-calibration-plot-calibration-py
def naive_bayes(x_train, y_train, x_test, y_test):
    # Gaussian Naive-Bayes with no calibration
    clf = GaussianNB()
    clf.fit(x_train, y_train)  # GaussianNB itself does not support sample-weights
    prob_pos_clf = clf.predict_proba(x_test)[:, 1]

    # Gaussian Naive-Bayes with isotonic calibration
    clf_isotonic = CalibratedClassifierCV(clf, cv=2, method='isotonic')
    clf_isotonic.fit(x_train, y_train)
    prob_pos_isotonic = clf_isotonic.predict_proba(x_test)[:, 1]

    # Gaussian Naive-Bayes with sigmoid calibration
    clf_sigmoid = CalibratedClassifierCV(clf, cv=2, method='sigmoid')
    clf_sigmoid.fit(x_train, y_train)
    prob_pos_sigmoid = clf_sigmoid.predict_proba(x_test)[:, 1]

    print("Brier scores: (the smaller the better)")

    clf_score = brier_score_loss(y_test, prob_pos_clf)
    print("No calibration: %1.3f" % clf_score)
    classification_report(y_test, prob_pos_clf, target_names=clf.classes_)

    clf_isotonic_score = brier_score_loss(y_test, prob_pos_isotonic)
    print("With isotonic calibration: %1.3f" % clf_isotonic_score)
    classification_report(y_test, prob_pos_isotonic, target_names=clf_isotonic.classes_)

    clf_sigmoid_score = brier_score_loss(y_test, prob_pos_sigmoid)
    print("With sigmoid calibration: %1.3f" % clf_sigmoid_score)
    classification_report(y_test, prob_pos_sigmoid, target_names=clf_sigmoid.classes_)
    return clf

# K-Means Module

In [19]:
def indic(train_x):
    pca = PCA(n_components=2)
    scaled_x = scale(train_x)
    return pca.fit_transform(scaled_x)


def k_means(train_x, train_y):
    num_classes = len(np.unique(train_x, axis=0))
    kmeans = KMeans(n_clusters=num_classes)
    kmeans.fit(train_x)

    # centroids = kmeans.cluster_centers_
    labels = kmeans.labels_

    # print("centroids")
    # print(centroids)

    # Project High dimensional data to 2-D
    # https://stackoverflow.com/questions/27930413/how-to-plot-a-multi-dimensional-data-point-in-python
    pr_comp = indic(train_x)
    x = pr_comp[:, 0]
    y = pr_comp[:, 1]
    plt.title("Clusters")

    colors = cm.rainbow(np.linspace(0, 1, num_classes))
    rows = np.shape(train_x)[0]

    # Print Label and feature
    for i in range(rows):
        print(str(labels[i]) + " " + str(train_y[i]))
        plt.plot(x[i], y[i], color=colors[labels[i]], marker='o', linestyle='-', markersize=10)

    # centroids = indic(centroids)
    # plt.scatter(centroids[:, 0], centroids[:, 1], marker="X", s=150, linewidths=5, zorder=10)

    # Create legend dictionary...
    legend_dict = {}
    for i in range(len(labels)):
        legend_dict[labels[i]] = colors[labels[i]]

    patchlist = []
    for key in legend_dict:
        data_key = mpatches.Patch(color=legend_dict[key], label=key)
        patchlist.append(data_key)

    plt.legend(loc='best', handles=patchlist)
    plt.show()

# Combined Classifier

In [20]:
from sklearn.ensemble import VotingClassifier

# Overall Purpose...
# I will have 3 Classifiers
# Wifi, Bluetooth and Misc. Rooms
# I can combined them using the Voting Classifier
# Reference: http://scikit-learn.org/stable/modules/ensemble.html
# I am assuming the three input classifiers are fitted...
def combined_classifier(room_clf, wifi_clf, blue_clf, train_x, train_y):
    # The type of classifier will depend on preliminary results...
    clf = VotingClassifier(estimators=[('dt', room_clf), ('knn', wifi_clf),
                                       ('svc', blue_clf)], voting='soft', weights=[2, 1, 2])
    clf.fit(train_x, train_y)
    top(clf, test_x, test_y)


# Read the Dataset!

In [17]:
def read_data_set(filename, isnumeric=False):
    x = np.genfromtxt(filename, delimiter=',', skip_header=1)
    if isnumeric:
        y = x[:, 0]
    else:
        y = np.genfromtxt(filename, delimiter=',', skip_header=1, dtype=str)
        y = y[:, 0]
    # x must delete first column which is the label
    x = x[:, 1:]
    return x, y


# THIS IS THE MAIN METHOD!

In [None]:
blue_x, blue_y = read_data_set('./blue.csv')
wifi_x, wifi_y = read_data_set('./wifi.csv')

# Build your CV sets here
blue_train_x, blue_train_y, blue_test_x, blue_test_y = get_cv_set(blue_x, blue_y)
wifi_train_x, wifi_train_y, wifi_test_x, wifi_test_y = get_cv_set(wifi_x, wifi_y)

# Have a Scaled and PCA form of your data
fixed_blue_train_x, fixed_blue_test_x = scale_and_pca(blue_train_x, blue_test_x)
fixed_wifi_train_x, fixed_wifi_test_x = scale_and_pca(wifi_train_x, wifi_test_x)

#-------------Bayes-------------
blue_clf = naive_bayes(blue_train_x, blue_train_y, blue_test_x, blue_train_y)
wifi_clf = naive_bayes(wifi_train_x, wifi_train_y, wifi_test_x, wifi_train_y)

#-------------LDA/QDA-----------
blue_lda = discriminant_line(fixed_blue_train_x, blue_train_y, fixed_blue_test_x, blue_test_y)
blue_qda = discriminant_quad(blue_train_x, blue_train_y, blue_test_x, blue_test_y)

wifi_lda = discriminant_line(fixed_wifi_train_x, wifi_train_y, fixed_wifi_test_x, wifi_test_y)
wifi_qda = discriminant_quad(wifi_train_x, wifi_train_y, wifi_test_x, wifi_test_y)

#-------------KNN--------------
blue_knn = tune_knn(blue_train_x, blue_train_y, blue_test_x, blue_test_y)
wifi_knn = tune_knn(wifi_train_x, wifi_train_y, wifi_test_x, wifi_test_y)

#-------------Logistic Regression-----------
blue_clf = logistic_linear(blue_train_x, blue_train_y, blue_test_x, blue_test_y)
wifi_clf = logistic_linear(wifi_train_x, wifi_train_y, wifi_test_x, wifi_test_y)
    
#-------------Neural Network---------------
blue_brain = get_brain(blue_train_x, blue_train_y, blue_test_x, blue_test_y)
wifi_brain = get_brain(wifi_train_x, wifi_train_y, wifi_test_x, wifi_test_y)

#-------------Random Forest----------------
blue_forest = get_forest(blue_train_x, blue_train_y, blue_test_x, blue_test_y)
wifi_forest = get_forest(wifi_train_x, wifi_train_y, wifi_test_x, wifi_test_y)    

#-------SVM-----------------
blue_clf = svm_linear(blue_train_x, blue_train_y, blue_test_x, blue_test_y)
blue_clf_rbf = svm_rbf(blue_train_x, blue_train_y, blue_test_x, blue_test_y)

wifi_clf = svm_linear(wifi_train_x, wifi_train_y, wifi_test_x, wifi_test_y)
wifi_clf_rbf = svm_rbf(wifi_train_x, wifi_train_y, wifi_test_x, wifi_test_y)
